Skip to content

Commit

Permalink
Merge branch 'features/unicode-format' of https://github.com/mattnotm…
Browse files Browse the repository at this point in the history
…itt/CyberChef into mattnotmitt-features/unicode-format
  • Loading branch information
n1474335 committed Feb 1, 2021
2 parents 09c6e18 + bf14c89 commit 0a09492
Show file tree
Hide file tree
Showing 6 changed files with 153 additions and 24 deletions.
1 change: 1 addition & 0 deletions src/core/config/Categories.json
Original file line number Diff line number Diff line change
Expand Up @@ -200,6 +200,7 @@
"ops": [
"Encode text",
"Decode text",
"Unicode Text Format",
"Remove Diacritics",
"Unescape Unicode Characters",
"Convert to NATO alphabet"
Expand Down
2 changes: 1 addition & 1 deletion src/core/operations/RemoveDiacritics.mjs
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ class RemoveDiacritics extends Operation {

this.name = "Remove Diacritics";
this.module = "Default";
this.description = "Replaces accented characters with their latin character equivalent.";
this.description = "Replaces accented characters with their latin character equivalent. Accented characters are made up of Unicode combining characters, so unicode text formatting such as strikethroughs and underlines will also be removed.";
this.infoURL = "https://wikipedia.org/wiki/Diacritic";
this.inputType = "string";
this.outputType = "string";
Expand Down
67 changes: 67 additions & 0 deletions src/core/operations/UnicodeTextFormat.mjs
Original file line number Diff line number Diff line change
@@ -0,0 +1,67 @@
/**
* @author Matt C [me@mitt.dev]
* @copyright Crown Copyright 2020
* @license Apache-2.0
*/

import Operation from "../Operation.mjs";
import Utils from "../Utils.mjs";

/**
* Unicode Text Format operation
*/
class UnicodeTextFormat extends Operation {

/**
* UnicodeTextFormat constructor
*/
constructor() {
super();

this.name = "Unicode Text Format";
this.module = "Default";
this.description = "Adds Unicode combining characters to change formatting of plaintext.";
this.infoURL = "https://en.wikipedia.org/wiki/Combining_character";
this.inputType = "byteArray";
this.outputType = "byteArray";
this.args = [
{
name: "Underline",
type: "boolean",
value: "false"
},
{
name: "Strikethrough",
type: "boolean",
value: "false"
}
];
}

/**
* @param {byteArray} input
* @param {Object[]} args
* @returns {byteArray}
*/
run(input, args) {
const [underline, strikethrough] = args;
let output = input.map(char => [char]);
if (strikethrough) {
output = output.map(charFormat => {
charFormat.push(...Utils.strToUtf8ByteArray("\u0336"));
return charFormat;
});
}
if (underline) {
output = output.map(charFormat => {
charFormat.push(...Utils.strToUtf8ByteArray("\u0332"));
return charFormat;
});
}
// return output.flat(); - Not supported in Node 10, polyfilled
return [].concat(...output);
}

}

export default UnicodeTextFormat;
2 changes: 1 addition & 1 deletion tests/operations/index.mjs
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,6 @@ import "./tests/ParseQRCode.mjs";
import "./tests/PowerSet.mjs";
import "./tests/Regex.mjs";
import "./tests/Register.mjs";
import "./tests/RemoveDiacritics.mjs";
import "./tests/Rotate.mjs";
import "./tests/SeqUtils.mjs";
import "./tests/SetDifference.mjs";
Expand Down Expand Up @@ -101,6 +100,7 @@ import "./tests/LuhnChecksum.mjs";
import "./tests/CipherSaber2.mjs";
import "./tests/Colossus.mjs";
import "./tests/ParseObjectIDTimestamp.mjs";
import "./tests/Unicode.mjs";


// Cannot test operations that use the File type yet
Expand Down
22 changes: 0 additions & 22 deletions tests/operations/tests/RemoveDiacritics.mjs

This file was deleted.

83 changes: 83 additions & 0 deletions tests/operations/tests/Unicode.mjs
Original file line number Diff line number Diff line change
@@ -0,0 +1,83 @@
/**
* Unicode operation tests.
*
* @author Matt C [me@mitt.dev]
* @author Klaxon [klaxon@veyr.com]
*
* @copyright Crown Copyright 2020
* @license Apache-2.0
*/
import TestRegister from "../../lib/TestRegister.mjs";

TestRegister.addTests([
{
name: "Unicode Text Format: underline",
input: "a",
expectedOutput: "a\u0332",
recipeConfig: [
{
"op": "Unicode Text Format",
"args": [true, false],
}
],
},
{
name: "Unicode Text Format: strikethrough",
input: "a",
expectedOutput: "a\u0336",
recipeConfig: [
{
"op": "Unicode Text Format",
"args": [false, true],
}
],
},
{
name: "Unicode Text Format: both",
input: "a",
expectedOutput: "a\u0336\u0332",
recipeConfig: [
{
"op": "Unicode Text Format",
"args": [true, true],
}
],
},
{
name: "Remove Diacritics: text formatting",
input: "a",
expectedOutput: "a",
recipeConfig: [
{
"op": "Unicode Text Format",
"args": [true, true],
},
{
"op": "Remove Diacritics",
"args": []
}
],
},
{
name: "Remove Diacritics: all diacritical marks one char",
input: "à̴̵̶̷̸̡̢̧̨̛̖̗̘̙̜̝̞̟̠̣̤̥̦̩̪̫̬̭̮̯̰̱̲̳̹̺̻̼́̂̃̄̅̆̇̈̉̊̋̌̍̎̏̐̑̒̓̔̽̾̿̀́͂̓̈́̕̚͠͡ͅ", // sorry about this line lol
expectedOutput: "a",
recipeConfig: [
{
"op": "Remove Diacritics",
"args": []
}
],
},
{
name: "Remove Diacritics: default",
input: "\xe0, \xe8, \xec, \xf2, \xf9 \xc0, \xc8, \xcc, \xd2, \xd9\n\xe1, \xe9, \xed, \xf3, \xfa, \xfd \xc1, \xc9, \xcd, \xd3, \xda, \xdd\n\xe2, \xea, \xee, \xf4, \xfb \xc2, \xca, \xce, \xd4, \xdb\n\xe3, \xf1, \xf5 \xc3, \xd1, \xd5\n\xe4, \xeb, \xef, \xf6, \xfc, \xff \xc4, \xcb, \xcf, \xd6, \xdc, \u0178\n\xe5, \xc5",
expectedOutput: "a, e, i, o, u A, E, I, O, U\na, e, i, o, u, y A, E, I, O, U, Y\na, e, i, o, u A, E, I, O, U\na, n, o A, N, O\na, e, i, o, u, y A, E, I, O, U, Y\na, A",
recipeConfig: [
{
"op": "Remove Diacritics",
"args": []
},
],
},
]);

0 comments on commit 0a09492

Please sign in to comment.