Skip to content

Commit

Permalink
Tidied up the Magic operation
Browse files Browse the repository at this point in the history
  • Loading branch information
n1474335 committed Mar 24, 2020
1 parent 26fa66e commit b765534
Show file tree
Hide file tree
Showing 43 changed files with 716 additions and 845 deletions.
24 changes: 1 addition & 23 deletions src/core/config/scripts/generateConfig.mjs
Original file line number Diff line number Diff line change
Expand Up @@ -43,31 +43,9 @@ for (const opObj in Ops) {
flowControl: op.flowControl,
manualBake: op.manualBake,
args: op.args,
checks: op.checks
};

if ("checks" in op) {
if ("input" in op.checks) {
operationConfig[op.name].input = {};
if ("regex" in op.checks.input) {
operationConfig[op.name].input.regex = op.checks.input.regex;
}
if ("entropy" in op.checks.input) {
operationConfig[op.name].input.entropy = op.checks.input.entropy;
}
}
if ("output" in op.checks) {
operationConfig[op.name].output = {};
if ("regex" in op.checks.output) {
operationConfig[op.name].output.regex = op.checks.output.regex;
}
if ("entropy" in op.checks.output) {
operationConfig[op.name].output.entropy = op.checks.output.entropy;
}
if ("mime" in op.checks.output) {
operationConfig[op.name].output.mime = op.checks.output.mime;
}
}
}
if (!(op.module in modules))
modules[op.module] = {};
modules[op.module][op.name] = opObj;
Expand Down
227 changes: 104 additions & 123 deletions src/core/lib/Magic.mjs
Original file line number Diff line number Diff line change
Expand Up @@ -19,68 +19,42 @@ class Magic {
* Magic constructor.
*
* @param {ArrayBuffer} buf
* @param {Object} prevOp
* @param {Object[]} [opCriteria]
* @param {Object} [prevOp]
*/
constructor(buf, opPatterns, prevOp) {
constructor(buf, opCriteria=Magic._generateOpCriteria(), prevOp=null) {
this.inputBuffer = new Uint8Array(buf);
this.inputStr = Utils.arrayBufferToStr(buf);
this.opPatterns = opPatterns || Magic._generateOpCriteria();
this.opCriteria = opCriteria;
this.prevOp = prevOp;
}

/**
* Finds operations that claim to be able to decode the input based on
* regular expression matches.
* Finds operations that claim to be able to decode the input based on various criteria.
*
* @param {[Object]} opPatterns
* @returns {Array}
*/
inputRegexMatch(opPatterns) {
const matches = [];

for (let i = 0; i < opPatterns.length; i++) {
const pattern = opPatterns[i];


if (pattern.match.test(this.inputStr)) {
matches.push(pattern);
}
}

return matches;
}

/**
* Finds operations that claim to be able to decode the input based on entropy
* matches.
*
* @param {[Object]} opPatterns
* @returns {Array}
* @returns {Object[]}
*/
entropyInputMatch(opPatterns) {
const matches = [];
findMatchingInputOps() {
const matches = [],
inputEntropy = this.calcEntropy();

this.opCriteria.forEach(check => {
// If the input doesn't lie in the required entropy range, move on
if (check.entropyRange &&
(inputEntropy < check.entropyRange[0] ||
inputEntropy > check.entropyRange[1]))
return;
// If the input doesn't match the pattern, move on
if (check.pattern &&
!check.pattern.test(this.inputStr))
return;

const entropyOfInput = this.calcEntropy();
matches.push(check);
});

for (let i = 0; i < opPatterns.length; i++) {
const currOp = opPatterns[i];
if ((entropyOfInput > currOp.entropy[0]) && (entropyOfInput < currOp.entropy[1]))
matches.push(currOp);
}
return matches;
}

/**
* Finds operations that claim to be able to decode the input based on criteria.
*
* @returns {Object[]}
*/
findMatchingInputOps() {
let matches = this.inputRegexMatch(this.opPatterns.regex);
matches = matches.concat(this.entropyInputMatch(this.opPatterns.entropy));
return [...new Set(matches)];
}

/**
* Attempts to detect the language of the input by comparing its byte frequency
* to that of several known languages.
Expand Down Expand Up @@ -218,8 +192,10 @@ class Magic {
*
* @returns {number}
*/
calcEntropy() {
const prob = this._freqDist();
calcEntropy(data=this.inputBuffer, standalone=false) {
if (!standalone && this.inputEntropy) return this.inputEntropy;

const prob = this._freqDist(data, standalone);
let entropy = 0,
p;

Expand All @@ -228,6 +204,8 @@ class Magic {
if (p === 0) continue;
entropy += p * Math.log(p) / Math.log(2);
}

if (!standalone) this.inputEntropy = -entropy;
return -entropy;
}

Expand Down Expand Up @@ -298,59 +276,57 @@ class Magic {
}

/**
* Checks whether the data passes output criteria for an operation check
*
* @param {ArrayBuffer} data
* @param {Object} criteria
* @returns {boolean}
*/
checkRegexes(regexes) {
for (const elem of regexes) {
const regex = new RegExp(elem.match, elem.flags);
if (regex.test(this.inputStr))
return true;
}
return false;
}
/**
*
*/
checkOutputFromPrevious() {
let score = 0;
if ("regex" in this.prevOp.output) {
if (this.checkRegexes(this.prevOp.output.regex)) score++;
}
if ("entropy" in this.prevOp.output) {
const inputEntropy = this.calcEntropy();
if ((inputEntropy > this.prevOp.output.entropy[0]) && (inputEntropy < this.prevOp.output.entropy[1])) score++;
outputCheckPasses(data, criteria) {
if (criteria.pattern) {
const dataStr = Utils.arrayBufferToStr(data),
regex = new RegExp(criteria.pattern, criteria.flags);
if (!regex.test(dataStr))
return false;
}
if ("mime" in this.prevOp.output) {
if (isType(this.prevOp.output.mime, this.inputBuffer)) score++;
if (criteria.entropyRange) {
const dataEntropy = this.calcEntropy(data, true);
if (dataEntropy < criteria.entropyRange[0] || dataEntropy > criteria.entropyRange[1])
return false;
}
return score > 0;
if (criteria.mime &&
!isType(criteria.mime, data))
return false;

return true;
}

/**
* Speculatively executes matching operations, recording metadata of each result.
*
* @param {number} [depth=0] - How many levels to try to execute
* @param {boolean} [extLang=false] - Extensive language support (false = only check the most
* common Internet languages)
* common Internet languages)
* @param {boolean} [intensive=false] - Run brute-forcing on each branch (significantly affects
* performance)
* performance)
* @param {Object[]} [recipeConfig=[]] - The recipe configuration up to this point
* @param {boolean} [useful=false] - Whether the current recipe should be scored highly
* @param {string} [crib=null] - The regex crib provided by the user, for filtering the operation output
* @param {string} [crib=null] - The regex crib provided by the user, for filtering the operation
* output
* @returns {Object[]} - A sorted list of the recipes most likely to result in correct decoding
*/
async speculativeExecution(depth=0, extLang=false, intensive=false, recipeConfig=[], useful=false, crib=null) {
async speculativeExecution(
depth=0,
extLang=false,
intensive=false,
recipeConfig=[],
useful=false,
crib=null) {

// If we have reached the recursion depth, return
if (depth < 0) return [];

// Find any operations that can be run on this data

if (this.prevOp) {
if ("output" in this.prevOp) {
if (!(this.checkOutputFromPrevious())) {
return [];
}
}
}
const matchingOps = this.findMatchingInputOps();
let results = [];

Expand All @@ -374,20 +350,24 @@ class Magic {
const opConfig = {
op: op.op,
args: op.args
}, output = await this._runRecipe([opConfig]);
},
output = await this._runRecipe([opConfig]);

// If the recipe is repeating and returning the same data, do not continue
if (prevOp && op.op === prevOp.op && _buffersEqual(output, this.inputBuffer)) {
// If the recipe returned an empty buffer, do not continue
if (_buffersEqual(output, new ArrayBuffer())) {
return;
}

// If the recipe returned an empty buffer, do not continue
if (_buffersEqual(output, new ArrayBuffer())) {
// If the recipe is repeating and returning the same data, do not continue
if (prevOp && op.op === prevOp.op && _buffersEqual(output, this.inputBuffer)) {
return;
}

// If the output criteria for this op doesn't match the output, do not continue
if (op.output && !this.outputCheckPasses(output, op.output))
return;

const magic = new Magic(output, this.opPatterns, OperationConfig[op.op]),
const magic = new Magic(output, this.opCriteria, OperationConfig[op.op]),
speculativeResults = await magic.speculativeExecution(
depth-1, extLang, intensive, [...recipeConfig, opConfig], op.useful, crib);

Expand All @@ -399,7 +379,7 @@ class Magic {
const bfEncodings = await this.bruteForce();

await Promise.all(bfEncodings.map(async enc => {
const magic = new Magic(enc.data, this.opPatterns, undefined),
const magic = new Magic(enc.data, this.opCriteria, undefined),
bfResults = await magic.speculativeExecution(
depth-1, extLang, false, [...recipeConfig, enc.conf], false, crib);

Expand All @@ -414,7 +394,8 @@ class Magic {
r.languageScores[0].probability > 0 || // Some kind of language was found
r.fileType || // A file was found
r.isUTF8 || // UTF-8 was found
r.matchingOps.length // A matching op was found
r.matchingOps.length || // A matching op was found
r.matchesCrib // The crib matches
)
);

Expand Down Expand Up @@ -445,9 +426,10 @@ class Magic {
bScore += b.entropy;

// A result with no recipe but matching ops suggests there are better options
if ((!a.recipe.length && a.matchingOps.length) &&
b.recipe.length)
if ((!a.recipe.length && a.matchingOps.length) && b.recipe.length)
return 1;
if ((!b.recipe.length && b.matchingOps.length) && a.recipe.length)
return -1;

return aScore - bScore;
});
Expand Down Expand Up @@ -486,28 +468,32 @@ class Magic {
* Calculates the number of times each byte appears in the input as a percentage
*
* @private
* @param {ArrayBuffer} [data]
* @param {boolean} [standalone]
* @returns {number[]}
*/
_freqDist() {
if (this.freqDist) return this.freqDist;
_freqDist(data=this.inputBuffer, standalone=false) {
if (!standalone && this.freqDist) return this.freqDist;

const len = this.inputBuffer.length;
const len = data.length,
counts = new Array(256).fill(0);
let i = len;
const counts = new Array(256).fill(0);

if (!len) {
this.freqDist = counts;
return this.freqDist;
}

while (i--) {
counts[this.inputBuffer[i]]++;
counts[data[i]]++;
}

this.freqDist = counts.map(c => {
const result = counts.map(c => {
return c / len * 100;
});
return this.freqDist;

if (!standalone) this.freqDist = result;
return result;
}

/**
Expand All @@ -517,30 +503,25 @@ class Magic {
* @returns {Object[]}
*/
static _generateOpCriteria() {
const opCriteria = {
regex: [],
entropy: []
};
const opCriteria = [];

for (const op in OperationConfig) {
if ("input" in OperationConfig[op]) {
if ("regex" in OperationConfig[op].input)
OperationConfig[op].input.regex.forEach(pattern => {
opCriteria.regex.push({
op: op,
match: new RegExp(pattern.match, pattern.flags),
args: pattern.args,
useful: pattern.useful || false
});
});
if ("entropy" in OperationConfig[op].input) {
opCriteria.entropy.push({
op: op,
entropy: OperationConfig[op].input.entropy.input,
args: OperationConfig[op].input.entropy.args
});
}
}
if (!("checks" in OperationConfig[op]))
continue;

OperationConfig[op].checks.forEach(check => {
// Add to the opCriteria list.
// Compile the regex here and cache the compiled version so we
// don't have to keep calculating it.
opCriteria.push({
op: op,
pattern: check.pattern ? new RegExp(check.pattern, check.flags) : null,
args: check.args,
useful: check.useful,
entropyRange: check.entropyRange,
output: check.output
});
});
}

return opCriteria;
Expand Down
Loading

0 comments on commit b765534

Please sign in to comment.