Skip to content

Commit

Permalink
Update rectangles to rectangle as only one region can be assigned, fix
Browse files Browse the repository at this point in the history
  • Loading branch information
jeromewu committed Dec 23, 2019
1 parent b8aba2e commit 945f5d3
Show file tree
Hide file tree
Showing 4 changed files with 97 additions and 28 deletions.
8 changes: 3 additions & 5 deletions docs/api.md
Original file line number Diff line number Diff line change
Expand Up @@ -179,7 +179,7 @@ Figures out what words are in `image`, where the words are in `image`, etc.

- `image` see [Image Format](./image-format.md) for more details.
- `options` a object of customized optons
- `rectangles` an array of objects to specify the region you want to recognized in the image, the object should contain top, left, width and height, see example below.
- `rectangle` an object to specify the region you want to recognized in the image, the object should contain top, left, width and height, see example below.
- `jobId` Please see details above

**Output:**
Expand All @@ -198,7 +198,7 @@ const { createWorker } = Tesseract;
})();
```

With rectangles
With rectangle

```javascript
const { createWorker } = Tesseract;
Expand All @@ -208,9 +208,7 @@ const { createWorker } = Tesseract;
await worker.loadLanguage('eng');
await worker.initialize('eng');
const { data: { text } } = await worker.recognize(image, {
rectangles: [
{ top: 0, left: 0, width: 100, height: 100 },
],
rectangle: { top: 0, left: 0, width: 100, height: 100 },
});
console.log(text);
})();
Expand Down
99 changes: 87 additions & 12 deletions docs/examples.md
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ You can also check [examples](../examples) folder.
### basic

```javascript
import { createWorker } from 'tesseract.js';
const { createWorker } = require('tesseract.js');

const worker = createWorker();

Expand All @@ -22,7 +22,7 @@ const worker = createWorker();
### with detailed progress

```javascript
import { createWorker } from 'tesseract.js';
const { createWorker } = require('tesseract.js');

const worker = createWorker({
logger: m => console.log(m), // Add logger here
Expand All @@ -41,7 +41,7 @@ const worker = createWorker({
### with multiple languages, separate by '+'

```javascript
import { createWorker } from 'tesseract.js';
const { createWorker } = require('tesseract.js');

const worker = createWorker();

Expand All @@ -57,7 +57,7 @@ const worker = createWorker();
### with whitelist char (^2.0.0-beta.1)

```javascript
import { createWorker } from 'tesseract.js';
const { createWorker } = require('tesseract.js');

const worker = createWorker();

Expand All @@ -79,7 +79,7 @@ const worker = createWorker();
Check here for more details of pageseg mode: https://github.com/tesseract-ocr/tesseract/blob/4.0.0/src/ccstruct/publictypes.h#L163

```javascript
import { createWorker, PSM } from 'tesseract.js';
const { createWorker, PSM } = require('tesseract.js');

const worker = createWorker();

Expand All @@ -103,30 +103,105 @@ Please check **examples** folder for details.
Browser: [download-pdf.html](../examples/browser/download-pdf.html)
Node: [download-pdf.js](../examples/node/download-pdf.js)

### with only part of the image (^2.0.0-beta.1)
### with only part of the image (^2.0.1)

**One rectangle**

```javascript
import { createWorker } from 'tesseract.js';
const { createWorker } = require('tesseract.js');

const worker = createWorker();
const rectangle = { left: 0, top: 0, width: 500, height: 250 };

(async () => {
await worker.load();
await worker.loadLanguage('eng');
await worker.initialize('eng');
const { data: { text } } = await worker.recognize('https://tesseract.projectnaptha.com/img/eng_bw.png', { rectangle });
console.log(text);
await worker.terminate();
})();
```

**Multiple Rectangles**

```javascript
const { createWorker } = require('tesseract.js');

const worker = createWorker();
const rectangles = [
{ left: 0, top: 0, width: 500, height: 250 },
{
left: 0,
top: 0,
width: 500,
height: 250,
},
{
left: 500,
top: 0,
width: 500,
height: 250,
},
];

(async () => {
await worker.load();
await worker.loadLanguage('eng');
await worker.initialize('eng');
const { data: { text } } = await worker.recognize('https://tesseract.projectnaptha.com/img/eng_bw.png', 'eng', { rectangles });
console.log(text);
const values = [];
for (let i = 0; i < rectangles.length; i++) {
const { data: { text } } = await worker.recognize('https://tesseract.projectnaptha.com/img/eng_bw.png', { rectangle: rectangles[i] });
values.push(text);
}
console.log(values);
await worker.terminate();
})();
```

**Multiple Rectangles (with scheduler to do recognition in parallel)**

```javascript
const { createWorker, createScheduler } = require('tesseract.js');

const scheduler = createScheduler();
const worker1 = createWorker();
const worker2 = createWorker();
const rectangles = [
{
left: 0,
top: 0,
width: 500,
height: 250,
},
{
left: 500,
top: 0,
width: 500,
height: 250,
},
];

(async () => {
await worker1.load();
await worker2.load();
await worker1.loadLanguage('eng');
await worker2.loadLanguage('eng');
await worker1.initialize('eng');
await worker2.initialize('eng');
scheduler.addWorker(worker1);
scheduler.addWorker(worker2);
const results = await Promise.all(rectangles.map((rectangle) => (
scheduler.addJob('recognize', 'https://tesseract.projectnaptha.com/img/eng_bw.png', { rectangle })
)));
console.log(results.map(r => r.data.text));
await scheduler.terminate();
})();
```

### with multiple workers to speed up (^2.0.0-beta.1)

```javascript
import { createWorker, createScheduler } from 'tesseract.js';
const { createWorker, createScheduler } = require('tesseract.js');

const scheduler = createScheduler();
const worker1 = createWorker();
Expand All @@ -143,7 +218,7 @@ const worker2 = createWorker();
scheduler.addWorker(worker2);
/** Add 10 recognition jobs */
const results = await Promise.all(Array(10).fill(0).map(() => (
await scheduler.addJob('recognize', 'https://tesseract.projectnaptha.com/img/eng_bw.png')
scheduler.addJob('recognize', 'https://tesseract.projectnaptha.com/img/eng_bw.png')
)))
console.log(results);
await scheduler.terminate(); // It also terminates all workers.
Expand Down
10 changes: 4 additions & 6 deletions src/worker-script/index.js
Original file line number Diff line number Diff line change
Expand Up @@ -187,14 +187,12 @@ const initialize = ({
}
};

const recognize = ({ payload: { image, options: { rectangles = [] } } }, res) => {
const recognize = ({ payload: { image, options: { rectangle: rec } } }, res) => {
try {
const ptr = setImage(TessModule, api, image);
rectangles.forEach(({
left, top, width, height,
}) => {
api.SetRectangle(left, top, width, height);
});
if (typeof rec === 'object') {
api.SetRectangle(rec.left, rec.top, rec.width, rec.height);
}
api.Recognize(null);
res.resolve(dump(TessModule, api, params));
TessModule._free(ptr);
Expand Down
8 changes: 3 additions & 5 deletions tests/recognize.test.js
Original file line number Diff line number Diff line change
Expand Up @@ -69,11 +69,9 @@ describe('recognize()', () => {
const { data: { text } } = await worker.recognize(
`${IMAGE_PATH}/${name}`,
{
rectangles: [
{
top, left, width, height,
},
],
rectangle: {
top, left, width, height,
},
},
);
expect(text).to.be(ans);
Expand Down

0 comments on commit 945f5d3

Please sign in to comment.