diff --git a/src/core/operations/OpticalCharacterRecognition.mjs b/src/core/operations/OpticalCharacterRecognition.mjs index 6262df7b4..dfcff9654 100644 --- a/src/core/operations/OpticalCharacterRecognition.mjs +++ b/src/core/operations/OpticalCharacterRecognition.mjs @@ -12,9 +12,10 @@ import { isImage } from "../lib/FileType.mjs"; import { toBase64 } from "../lib/Base64.mjs"; import { isWorkerEnvironment } from "../Utils.mjs"; -import process from "process"; import { createWorker } from "tesseract.js"; +const OEM_MODES = ["Tesseract only", "LSTM only", "Tesseract/LSTM Combined"]; + /** * Optical Character Recognition operation */ @@ -37,6 +38,12 @@ class OpticalCharacterRecognition extends Operation { name: "Show confidence", type: "boolean", value: true + }, + { + name: "OCR Engine Mode", + type: "option", + value: OEM_MODES, + defaultIndex: 1 } ]; } @@ -47,7 +54,7 @@ class OpticalCharacterRecognition extends Operation { * @returns {string} */ async run(input, args) { - const [showConfidence] = args; + const [showConfidence, oemChoice] = args; if (!isWorkerEnvironment()) throw new OperationError("This operation only works in a browser"); @@ -56,12 +63,13 @@ class OpticalCharacterRecognition extends Operation { throw new OperationError("Unsupported file type (supported: jpg,png,pbm,bmp) or no file provided"); } - const assetDir = isWorkerEnvironment() ? `${self.docURL}/assets/` : `${process.cwd()}/src/core/vendor/`; + const assetDir = `${self.docURL}/assets/`; + const oem = OEM_MODES.indexOf(oemChoice); try { self.sendStatusMessage("Spinning up Tesseract worker..."); const image = `data:${type};base64,${toBase64(input)}`; - const worker = createWorker({ + const worker = await createWorker("eng", oem, { workerPath: `${assetDir}tesseract/worker.min.js`, langPath: `${assetDir}tesseract/lang-data`, corePath: `${assetDir}tesseract/tesseract-core.wasm.js`, @@ -71,11 +79,6 @@ class OpticalCharacterRecognition extends Operation { } } }); - await worker.load(); - self.sendStatusMessage(`Loading English language pack...`); - await worker.loadLanguage("eng"); - self.sendStatusMessage("Intialising Tesseract API..."); - await worker.initialize("eng"); self.sendStatusMessage("Finding text..."); const result = await worker.recognize(image); diff --git a/tests/browser/02_ops.js b/tests/browser/02_ops.js index 37f75f584..70cfd3ba1 100644 --- a/tests/browser/02_ops.js +++ b/tests/browser/02_ops.js @@ -236,7 +236,7 @@ module.exports = { // testOp(browser, "OR", "test input", "test_output"); // testOp(browser, "Object Identifier to Hex", "test input", "test_output"); testOpHtml(browser, "Offset checker", "test input\n\nbest input", ".hl5", "est input"); - // testOp(browser, "Optical Character Recognition", "test input", "test_output"); + testOpFile(browser, "Optical Character Recognition", "files/testocr.png", false, /This is a lot of 12 point text to test the/, [], 10000); // testOp(browser, "PEM to Hex", "test input", "test_output"); // testOp(browser, "PGP Decrypt", "test input", "test_output"); // testOp(browser, "PGP Decrypt and Verify", "test input", "test_output"); @@ -408,7 +408,7 @@ module.exports = { * @param {Browser} browser - Nightwatch client * @param {string|Array} opName - name of operation to be tested, array for multiple ops * @param {string} input - input text for test - * @param {Array|Array>} args - arguments, nested if multiple ops + * @param {Array|Array>} [args=[]] - arguments, nested if multiple ops */ function bakeOp(browser, opName, input, args=[]) { browser.perform(function() { @@ -425,8 +425,8 @@ function bakeOp(browser, opName, input, args=[]) { * @param {Browser} browser - Nightwatch client * @param {string|Array} opName - name of operation to be tested, array for multiple ops * @param {string} input - input text - * @param {string} output - expected output - * @param {Array|Array>} args - arguments, nested if multiple ops + * @param {string|RegExp} output - expected output + * @param {Array|Array>} [args=[]] - arguments, nested if multiple ops */ function testOp(browser, opName, input, output, args=[]) { bakeOp(browser, opName, input, args); @@ -440,8 +440,8 @@ function testOp(browser, opName, input, output, args=[]) { * @param {string|Array} opName - name of operation to be tested array for multiple ops * @param {string} input - input text * @param {string} cssSelector - CSS selector for HTML output - * @param {string} output - expected output - * @param {Array|Array>} args - arguments, nested if multiple ops + * @param {string|RegExp} output - expected output + * @param {Array|Array>} [args=[]] - arguments, nested if multiple ops */ function testOpHtml(browser, opName, input, cssSelector, output, args=[]) { bakeOp(browser, opName, input, args); @@ -459,9 +459,9 @@ function testOpHtml(browser, opName, input, cssSelector, output, args=[]) { * @param {Browser} browser - Nightwatch client * @param {string|Array} opName - name of operation to be tested array for multiple ops * @param {string} filename - filename of image file from samples directory - * @param {Array|Array>} args - arguments, nested if multiple ops + * @param {Array|Array>} [args=[]] - arguments, nested if multiple ops */ -function testOpImage(browser, opName, filename, args) { +function testOpImage(browser, opName, filename, args=[]) { browser.perform(function() { console.log(`Current test: ${opName}`); }); @@ -481,11 +481,12 @@ function testOpImage(browser, opName, filename, args) { * @param {Browser} browser - Nightwatch client * @param {string|Array} opName - name of operation to be tested array for multiple ops * @param {string} filename - filename of file from samples directory - * @param {string} cssSelector - CSS selector for HTML output - * @param {string} output - expected output - * @param {Array|Array>} args - arguments, nested if multiple ops + * @param {string|boolean} cssSelector - CSS selector for HTML output or false for normal text output + * @param {string|RegExp} output - expected output + * @param {Array|Array>} [args=[]] - arguments, nested if multiple ops + * @param {number} [waitWindow=1000] - The number of milliseconds to wait for the output to be correct */ -function testOpFile(browser, opName, filename, cssSelector, output, args) { +function testOpFile(browser, opName, filename, cssSelector, output, args=[], waitWindow=1000) { browser.perform(function() { console.log(`Current test: ${opName}`); }); @@ -494,9 +495,14 @@ function testOpFile(browser, opName, filename, cssSelector, output, args) { browser.pause(100).waitForElementVisible("#stale-indicator", 5000); utils.bake(browser); - if (typeof output === "string") { + if (!cssSelector) { + // Text output + utils.expectOutput(browser, output, true, waitWindow); + } else if (typeof output === "string") { + // HTML output - string match browser.expect.element("#output-html " + cssSelector).text.that.equals(output); } else if (output instanceof RegExp) { + // HTML output - RegEx match browser.expect.element("#output-html " + cssSelector).text.that.matches(output); } } diff --git a/tests/browser/browserUtils.js b/tests/browser/browserUtils.js index dc0774af9..7711c004b 100644 --- a/tests/browser/browserUtils.js +++ b/tests/browser/browserUtils.js @@ -180,15 +180,16 @@ function loadRecipe(browser, opName, input, args) { * @param {Browser} browser - Nightwatch client * @param {string|RegExp} expected - The expected output value * @param {boolean} [waitNotNull=false] - Wait for the output to not be empty before testing the value + * @param {number} [waitWindow=1000] - The number of milliseconds to wait for the output to be correct */ -function expectOutput(browser, expected, waitNotNull=false) { +function expectOutput(browser, expected, waitNotNull=false, waitWindow=1000) { if (waitNotNull && expected !== "") { browser.waitUntil(async function() { const output = await this.execute(function() { return window.app.manager.output.outputEditorView.state.doc.toString(); }); return output.length; - }, 1000); + }, waitWindow); } browser.execute(expected => { diff --git a/tests/samples/files/testocr.png b/tests/samples/files/testocr.png new file mode 100644 index 000000000..ce8d0e78b Binary files /dev/null and b/tests/samples/files/testocr.png differ