Skip to content

Commit

Permalink
* Add IteratorOverClassifierChoicesExample, `OrientationAndScriptD…
Browse files Browse the repository at this point in the history
…etectionExample`, and `ResultIteratorExample` for Tesseract (pull #675)
  • Loading branch information
ajeans authored and saudet committed Jan 21, 2019
1 parent 577fe8c commit c38d579
Show file tree
Hide file tree
Showing 3 changed files with 197 additions and 0 deletions.
Original file line number Diff line number Diff line change
@@ -0,0 +1,73 @@

package org.bytedeco.javacpp.samples.tesseract;

import static org.bytedeco.javacpp.lept.*;
import static org.bytedeco.javacpp.tesseract.*;

import java.io.File;
import java.net.URL;

import org.bytedeco.javacpp.BytePointer;
import org.bytedeco.javacpp.Loader;

/**
* To run this program, you need to configure:
* <ul>
* <li>An environment variable pointing to the dictionaries installed on the system
* TESSDATA_PREFIX=/usr/share/tesseract-ocr/4.00</li>
* <li>An environment variable to tweak the Locale
* LC_ALL=C</li>
* </ul>
*
* @author Arnaud Jeansen
*/
public class IteratorOverClassifierChoicesExample {
public static void main(String[] args) throws Exception {
BytePointer outText;
BytePointer choiceText;

TessBaseAPI api = new TessBaseAPI();
// Initialize tesseract-ocr with English, intializing tessdata path with the standard ENV variable
if (api.Init(System.getenv("TESSDATA_PREFIX") + "/tessdata", "eng") != 0) {
System.err.println("Could not initialize tesseract.");
System.exit(1);
}

// Open input image with leptonica library
URL url = new URL("https://upload.wikimedia.org/wikipedia/commons/thumb/7/74/Computer_modern_sample.svg/1920px-Computer_modern_sample.svg.png");
File file = Loader.cacheResource(url);
PIX image = pixRead(file.getAbsolutePath());
api.SetImage(image);

ETEXT_DESC recoc = TessMonitorCreate();
api.Recognize(recoc);

ResultIterator ri = api.GetIterator();
int pageIteratorLevel = RIL_SYMBOL;
if (ri != null) {
do {
outText = ri.GetUTF8Text(pageIteratorLevel);
float conf = ri.Confidence(pageIteratorLevel);
String symbolInformation = String.format("symbol: '%s'; \tconf: %.2f", outText.getString(), conf);
System.out.println(symbolInformation);
boolean indent = false;
ChoiceIterator ci = TessResultIteratorGetChoiceIterator(ri);
do {
if (indent)
System.out.print("\t\t");
System.out.print("\t-");
choiceText = ci.GetUTF8Text();
System.out.println(String.format("%s conf: %f", choiceText.getString(), ci.Confidence()));
indent = true;
choiceText.deallocate();
} while (ci.Next());

outText.deallocate();
} while (ri.Next(pageIteratorLevel));
}

// Destroy used object and release memory
api.End();
pixDestroy(image);
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@

package org.bytedeco.javacpp.samples.tesseract;

import static org.bytedeco.javacpp.lept.*;
import static org.bytedeco.javacpp.tesseract.*;

import java.io.File;
import java.net.URL;

import org.bytedeco.javacpp.BytePointer;
import org.bytedeco.javacpp.Loader;
import org.bytedeco.javacpp.tesseract;

/**
* To run this program, you need to configure:
* <ul>
* <li>An environment variable pointing to the dictionaries installed on the system
* TESSDATA_PREFIX=/usr/share/tesseract-ocr/4.00</li>
* <li>An environment variable to tweak the Locale
* LC_ALL=C</li>
* </ul>
*
* @author Arnaud Jeansen
*/
public class OrientationAndScriptDetectionExample {
public static void main(String[] args) throws Exception {
BytePointer outText;

TessBaseAPI api = new TessBaseAPI();
// Initialize tesseract-ocr with English, intializing tessdata path with the standard ENV variable
if (api.Init(System.getenv("TESSDATA_PREFIX") + "/tessdata", "eng") != 0) {
System.err.println("Could not initialize tesseract.");
System.exit(1);
}

// Open input image with leptonica library
URL url = new URL("https://upload.wikimedia.org/wikipedia/commons/thumb/7/74/Computer_modern_sample.svg/1920px-Computer_modern_sample.svg.png");
File file = Loader.cacheResource(url);
PIX image = pixRead(file.getAbsolutePath());
api.SetPageSegMode(PSM_AUTO_OSD);
api.SetImage(image);
tesseract.ETEXT_DESC reco = TessMonitorCreate();
api.Recognize(reco);

tesseract.PageIterator iterator = api.AnalyseLayout();
int[] orientation = new int[1];
int[] writing_direction = new int[1];
int[] textline_order = new int[1];
float[] deskew_angle = new float[1];

iterator.Orientation(orientation, writing_direction, textline_order, deskew_angle);
String osdInformation = String.format("Orientation: %d;\nWritingDirection: %d\nTextlineOrder: %d\nDeskew angle: %.4f\n",
orientation[0], writing_direction[0], textline_order[0], deskew_angle[0]);
System.out.println(osdInformation);

// Destroy used object and release memory
api.End();
pixDestroy(image);
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@

package org.bytedeco.javacpp.samples.tesseract;

import static org.bytedeco.javacpp.lept.*;
import static org.bytedeco.javacpp.tesseract.*;

import java.io.File;
import java.net.URL;

import org.bytedeco.javacpp.BytePointer;
import org.bytedeco.javacpp.Loader;
import org.bytedeco.javacpp.tesseract;

/**
* To run this program, you need to configure:
* <ul>
* <li>An environment variable pointing to the dictionaries installed on the system
* TESSDATA_PREFIX=/usr/share/tesseract-ocr/4.00</li>
* <li>An environment variable to tweak the Locale
* LC_ALL=C</li>
* </ul>
*
* @author Arnaud Jeansen
*/
public class ResultIteratorExample {
public static void main(String[] args) throws Exception {
BytePointer outText;

TessBaseAPI api = new TessBaseAPI();
// Initialize tesseract-ocr with English, intializing tessdata path with the standard ENV variable
if (api.Init(System.getenv("TESSDATA_PREFIX") + "/tessdata", "eng") != 0) {
System.err.println("Could not initialize tesseract.");
System.exit(1);
}

// Open input image with leptonica library
URL url = new URL("https://upload.wikimedia.org/wikipedia/commons/thumb/7/74/Computer_modern_sample.svg/1920px-Computer_modern_sample.svg.png");
File file = Loader.cacheResource(url);
PIX image = pixRead(file.getAbsolutePath());
api.SetImage(image);

tesseract.ETEXT_DESC recoc = TessMonitorCreate();
api.Recognize(recoc);

tesseract.ResultIterator ri = api.GetIterator();
int pageIteratorLevel = RIL_WORD;
if (ri != null) {
do {
outText = ri.GetUTF8Text(pageIteratorLevel);
float conf = ri.Confidence(pageIteratorLevel);
int[] x1 = new int[1], y1 = new int[1], x2 = new int[1], y2 = new int[1];
ri.BoundingBox(pageIteratorLevel, x1, y1, x2, y2);
String riInformation = String.format("word: '%s'; \tconf: %.2f; BoundingBox: %d,%d,%d,%d;\n", outText.getString(), conf, x1[0], y1[0], x2[0], y2[0]);
System.out.println(riInformation);

outText.deallocate();
} while (ri.Next(pageIteratorLevel));
}

// Destroy used object and release memory
api.End();
pixDestroy(image);
}
}

0 comments on commit c38d579

Please sign in to comment.