diff --git a/package-lock.json b/package-lock.json index 61c05f2c3..5ede14d1d 100644 --- a/package-lock.json +++ b/package-lock.json @@ -8750,7 +8750,8 @@ "version": "2.1.1", "resolved": "https://registry.npmjs.org/ms/-/ms-2.1.1.tgz", "integrity": "sha512-tgp+dl5cGk28utYktBsrFqA7HKgrhgPsg6Z/EfhWI4gl1Hwq8B/GmY/0oXZ6nF8hDVesS/FpnYaD/kOWhYQvyg==", - "dev": true + "dev": true, + "optional": true }, "p-limit": { "version": "2.3.0", @@ -13306,6 +13307,7 @@ "resolved": "https://registry.npmjs.org/braces/-/braces-2.3.2.tgz", "integrity": "sha512-aNdbnj9P8PjdXU4ybaWLK2IF3jc/EoDYbC7AazW6to3TRsfXxscC9UXOB5iDiEQrkyIbWp2SLQda4+QAa7nc3w==", "dev": true, + "optional": true, "requires": { "arr-flatten": "^1.1.0", "array-unique": "^0.3.2", @@ -13324,6 +13326,7 @@ "resolved": "https://registry.npmjs.org/extend-shallow/-/extend-shallow-2.0.1.tgz", "integrity": "sha1-Ua99YUrZqfYQ6huvu5idaxxWiQ8=", "dev": true, + "optional": true, "requires": { "is-extendable": "^0.1.0" } @@ -13356,6 +13359,7 @@ "resolved": "https://registry.npmjs.org/fill-range/-/fill-range-4.0.0.tgz", "integrity": "sha1-1USBHUKPmOsGpj3EAtJAPDKMOPc=", "dev": true, + "optional": true, "requires": { "extend-shallow": "^2.0.1", "is-number": "^3.0.0", @@ -13368,6 +13372,7 @@ "resolved": "https://registry.npmjs.org/extend-shallow/-/extend-shallow-2.0.1.tgz", "integrity": "sha1-Ua99YUrZqfYQ6huvu5idaxxWiQ8=", "dev": true, + "optional": true, "requires": { "is-extendable": "^0.1.0" } @@ -13422,13 +13427,15 @@ "version": "1.1.6", "resolved": "https://registry.npmjs.org/is-buffer/-/is-buffer-1.1.6.tgz", "integrity": "sha512-NcdALwpXkTm5Zvvbk7owOUSvVvBKDgKP5/ewfXEznmQFfs4ZRmanOeKBTjRVjka3QFoN6XJ+9F3USqfHqTaU5w==", - "dev": true + "dev": true, + "optional": true }, "is-number": { "version": "3.0.0", "resolved": "https://registry.npmjs.org/is-number/-/is-number-3.0.0.tgz", "integrity": "sha1-JP1iAaR4LPUFYcgQJ2r8fRLXEZU=", "dev": true, + "optional": true, "requires": { "kind-of": "^3.0.2" }, @@ -13438,6 +13445,7 @@ "resolved": "https://registry.npmjs.org/kind-of/-/kind-of-3.2.2.tgz", "integrity": "sha1-MeohpzS6ubuw8yRm2JOupR5KPGQ=", "dev": true, + "optional": true, "requires": { "is-buffer": "^1.1.5" } @@ -13449,6 +13457,7 @@ "resolved": "https://registry.npmjs.org/micromatch/-/micromatch-3.1.10.tgz", "integrity": "sha512-MWikgl9n9M3w+bpsY3He8L+w9eF9338xRl8IAO5viDizwSzziFEyUzo2xrrloB64ADbTf8uA8vRqqttDTOmccg==", "dev": true, + "optional": true, "requires": { "arr-diff": "^4.0.0", "array-unique": "^0.3.2", @@ -13482,6 +13491,7 @@ "resolved": "https://registry.npmjs.org/to-regex-range/-/to-regex-range-2.1.1.tgz", "integrity": "sha1-fIDBe53+vlmeJzZ+DU3VWQFB2zg=", "dev": true, + "optional": true, "requires": { "is-number": "^3.0.0", "repeat-string": "^1.6.1" diff --git a/src/core/config/Categories.json b/src/core/config/Categories.json index 77e3d3194..ba0b83f4f 100755 --- a/src/core/config/Categories.json +++ b/src/core/config/Categories.json @@ -200,6 +200,7 @@ "ops": [ "Encode text", "Decode text", + "Unicode Text Format", "Remove Diacritics", "Unescape Unicode Characters", "Convert to NATO alphabet" diff --git a/src/core/operations/RemoveDiacritics.mjs b/src/core/operations/RemoveDiacritics.mjs index dd8143758..859d86d78 100644 --- a/src/core/operations/RemoveDiacritics.mjs +++ b/src/core/operations/RemoveDiacritics.mjs @@ -19,7 +19,7 @@ class RemoveDiacritics extends Operation { this.name = "Remove Diacritics"; this.module = "Default"; - this.description = "Replaces accented characters with their latin character equivalent."; + this.description = "Replaces accented characters with their latin character equivalent. Accented characters are made up of Unicode combining characters, so unicode text formatting such as strikethroughs and underlines will also be removed."; this.infoURL = "https://wikipedia.org/wiki/Diacritic"; this.inputType = "string"; this.outputType = "string"; diff --git a/src/core/operations/UnicodeTextFormat.mjs b/src/core/operations/UnicodeTextFormat.mjs new file mode 100644 index 000000000..b1fc474bb --- /dev/null +++ b/src/core/operations/UnicodeTextFormat.mjs @@ -0,0 +1,67 @@ +/** + * @author Matt C [me@mitt.dev] + * @copyright Crown Copyright 2020 + * @license Apache-2.0 + */ + +import Operation from "../Operation.mjs"; +import Utils from "../Utils.mjs"; + +/** + * Unicode Text Format operation + */ +class UnicodeTextFormat extends Operation { + + /** + * UnicodeTextFormat constructor + */ + constructor() { + super(); + + this.name = "Unicode Text Format"; + this.module = "Default"; + this.description = "Adds Unicode combining characters to change formatting of plaintext."; + this.infoURL = "https://en.wikipedia.org/wiki/Combining_character"; + this.inputType = "byteArray"; + this.outputType = "byteArray"; + this.args = [ + { + name: "Underline", + type: "boolean", + value: "false" + }, + { + name: "Strikethrough", + type: "boolean", + value: "false" + } + ]; + } + + /** + * @param {byteArray} input + * @param {Object[]} args + * @returns {byteArray} + */ + run(input, args) { + const [underline, strikethrough] = args; + let output = input.map(char => [char]); + if (strikethrough) { + output = output.map(charFormat => { + charFormat.push(...Utils.strToUtf8ByteArray("\u0336")); + return charFormat; + }); + } + if (underline) { + output = output.map(charFormat => { + charFormat.push(...Utils.strToUtf8ByteArray("\u0332")); + return charFormat; + }); + } + // return output.flat(); - Not supported in Node 10, polyfilled + return [].concat(...output); + } + +} + +export default UnicodeTextFormat; diff --git a/tests/operations/index.mjs b/tests/operations/index.mjs index 8d3cd623d..079912565 100644 --- a/tests/operations/index.mjs +++ b/tests/operations/index.mjs @@ -69,7 +69,6 @@ import "./tests/ParseQRCode.mjs"; import "./tests/PowerSet.mjs"; import "./tests/Regex.mjs"; import "./tests/Register.mjs"; -import "./tests/RemoveDiacritics.mjs"; import "./tests/Rotate.mjs"; import "./tests/SeqUtils.mjs"; import "./tests/SetDifference.mjs"; @@ -101,6 +100,7 @@ import "./tests/LuhnChecksum.mjs"; import "./tests/CipherSaber2.mjs"; import "./tests/Colossus.mjs"; import "./tests/ParseObjectIDTimestamp.mjs"; +import "./tests/Unicode.mjs"; // Cannot test operations that use the File type yet diff --git a/tests/operations/tests/RemoveDiacritics.mjs b/tests/operations/tests/RemoveDiacritics.mjs deleted file mode 100644 index c58a2ba60..000000000 --- a/tests/operations/tests/RemoveDiacritics.mjs +++ /dev/null @@ -1,22 +0,0 @@ -/** - * Remove Diacritics tests. - * - * @author Klaxon [klaxon@veyr.com] - * @copyright Crown Copyright 2017 - * @license Apache-2.0 - */ -import TestRegister from "../../lib/TestRegister.mjs"; - -TestRegister.addTests([ - { - name: "Remove Diacritics", - input: "\xe0, \xe8, \xec, \xf2, \xf9 \xc0, \xc8, \xcc, \xd2, \xd9\n\xe1, \xe9, \xed, \xf3, \xfa, \xfd \xc1, \xc9, \xcd, \xd3, \xda, \xdd\n\xe2, \xea, \xee, \xf4, \xfb \xc2, \xca, \xce, \xd4, \xdb\n\xe3, \xf1, \xf5 \xc3, \xd1, \xd5\n\xe4, \xeb, \xef, \xf6, \xfc, \xff \xc4, \xcb, \xcf, \xd6, \xdc, \u0178\n\xe5, \xc5", - expectedOutput: "a, e, i, o, u A, E, I, O, U\na, e, i, o, u, y A, E, I, O, U, Y\na, e, i, o, u A, E, I, O, U\na, n, o A, N, O\na, e, i, o, u, y A, E, I, O, U, Y\na, A", - recipeConfig: [ - { - "op": "Remove Diacritics", - "args": [] - }, - ], - }, -]); diff --git a/tests/operations/tests/Unicode.mjs b/tests/operations/tests/Unicode.mjs new file mode 100644 index 000000000..2603768f6 --- /dev/null +++ b/tests/operations/tests/Unicode.mjs @@ -0,0 +1,83 @@ +/** + * Unicode operation tests. + * + * @author Matt C [me@mitt.dev] + * @author Klaxon [klaxon@veyr.com] + * + * @copyright Crown Copyright 2020 + * @license Apache-2.0 + */ +import TestRegister from "../../lib/TestRegister.mjs"; + +TestRegister.addTests([ + { + name: "Unicode Text Format: underline", + input: "a", + expectedOutput: "a\u0332", + recipeConfig: [ + { + "op": "Unicode Text Format", + "args": [true, false], + } + ], + }, + { + name: "Unicode Text Format: strikethrough", + input: "a", + expectedOutput: "a\u0336", + recipeConfig: [ + { + "op": "Unicode Text Format", + "args": [false, true], + } + ], + }, + { + name: "Unicode Text Format: both", + input: "a", + expectedOutput: "a\u0336\u0332", + recipeConfig: [ + { + "op": "Unicode Text Format", + "args": [true, true], + } + ], + }, + { + name: "Remove Diacritics: text formatting", + input: "a", + expectedOutput: "a", + recipeConfig: [ + { + "op": "Unicode Text Format", + "args": [true, true], + }, + { + "op": "Remove Diacritics", + "args": [] + } + ], + }, + { + name: "Remove Diacritics: all diacritical marks one char", + input: "à̴̵̶̷̸̡̢̧̨̛̖̗̘̙̜̝̞̟̠̣̤̥̦̩̪̫̬̭̮̯̰̱̲̳̹̺̻̼́̂̃̄̅̆̇̈̉̊̋̌̍̎̏̐̑̒̓̔̽̾̿̀́͂̓̈́̕̚͠͡ͅ", // sorry about this line lol + expectedOutput: "a", + recipeConfig: [ + { + "op": "Remove Diacritics", + "args": [] + } + ], + }, + { + name: "Remove Diacritics: default", + input: "\xe0, \xe8, \xec, \xf2, \xf9 \xc0, \xc8, \xcc, \xd2, \xd9\n\xe1, \xe9, \xed, \xf3, \xfa, \xfd \xc1, \xc9, \xcd, \xd3, \xda, \xdd\n\xe2, \xea, \xee, \xf4, \xfb \xc2, \xca, \xce, \xd4, \xdb\n\xe3, \xf1, \xf5 \xc3, \xd1, \xd5\n\xe4, \xeb, \xef, \xf6, \xfc, \xff \xc4, \xcb, \xcf, \xd6, \xdc, \u0178\n\xe5, \xc5", + expectedOutput: "a, e, i, o, u A, E, I, O, U\na, e, i, o, u, y A, E, I, O, U, Y\na, e, i, o, u A, E, I, O, U\na, n, o A, N, O\na, e, i, o, u, y A, E, I, O, U, Y\na, A", + recipeConfig: [ + { + "op": "Remove Diacritics", + "args": [] + }, + ], + }, +]);