From c38d57947cd58f6f2e5d70bf9e38a5fc7ba33337 Mon Sep 17 00:00:00 2001 From: Arnaud Jeansen Date: Mon, 21 Jan 2019 03:50:16 +0100 Subject: [PATCH] * Add `IteratorOverClassifierChoicesExample`, `OrientationAndScriptDetectionExample`, and `ResultIteratorExample` for Tesseract (pull #675) --- .../IteratorOverClassifierChoicesExample.java | 73 +++++++++++++++++++ .../OrientationAndScriptDetectionExample.java | 60 +++++++++++++++ .../tesseract/ResultIteratorExample.java | 64 ++++++++++++++++ 3 files changed, 197 insertions(+) create mode 100644 tesseract/samples/src/main/java/org/bytedeco/javacpp/samples/tesseract/IteratorOverClassifierChoicesExample.java create mode 100644 tesseract/samples/src/main/java/org/bytedeco/javacpp/samples/tesseract/OrientationAndScriptDetectionExample.java create mode 100644 tesseract/samples/src/main/java/org/bytedeco/javacpp/samples/tesseract/ResultIteratorExample.java diff --git a/tesseract/samples/src/main/java/org/bytedeco/javacpp/samples/tesseract/IteratorOverClassifierChoicesExample.java b/tesseract/samples/src/main/java/org/bytedeco/javacpp/samples/tesseract/IteratorOverClassifierChoicesExample.java new file mode 100644 index 00000000000..0c1b761babd --- /dev/null +++ b/tesseract/samples/src/main/java/org/bytedeco/javacpp/samples/tesseract/IteratorOverClassifierChoicesExample.java @@ -0,0 +1,73 @@ + +package org.bytedeco.javacpp.samples.tesseract; + +import static org.bytedeco.javacpp.lept.*; +import static org.bytedeco.javacpp.tesseract.*; + +import java.io.File; +import java.net.URL; + +import org.bytedeco.javacpp.BytePointer; +import org.bytedeco.javacpp.Loader; + +/** + * To run this program, you need to configure: + * + * + * @author Arnaud Jeansen + */ +public class IteratorOverClassifierChoicesExample { + public static void main(String[] args) throws Exception { + BytePointer outText; + BytePointer choiceText; + + TessBaseAPI api = new TessBaseAPI(); + // Initialize tesseract-ocr with English, intializing tessdata path with the standard ENV variable + if (api.Init(System.getenv("TESSDATA_PREFIX") + "/tessdata", "eng") != 0) { + System.err.println("Could not initialize tesseract."); + System.exit(1); + } + + // Open input image with leptonica library + URL url = new URL("https://upload.wikimedia.org/wikipedia/commons/thumb/7/74/Computer_modern_sample.svg/1920px-Computer_modern_sample.svg.png"); + File file = Loader.cacheResource(url); + PIX image = pixRead(file.getAbsolutePath()); + api.SetImage(image); + + ETEXT_DESC recoc = TessMonitorCreate(); + api.Recognize(recoc); + + ResultIterator ri = api.GetIterator(); + int pageIteratorLevel = RIL_SYMBOL; + if (ri != null) { + do { + outText = ri.GetUTF8Text(pageIteratorLevel); + float conf = ri.Confidence(pageIteratorLevel); + String symbolInformation = String.format("symbol: '%s'; \tconf: %.2f", outText.getString(), conf); + System.out.println(symbolInformation); + boolean indent = false; + ChoiceIterator ci = TessResultIteratorGetChoiceIterator(ri); + do { + if (indent) + System.out.print("\t\t"); + System.out.print("\t-"); + choiceText = ci.GetUTF8Text(); + System.out.println(String.format("%s conf: %f", choiceText.getString(), ci.Confidence())); + indent = true; + choiceText.deallocate(); + } while (ci.Next()); + + outText.deallocate(); + } while (ri.Next(pageIteratorLevel)); + } + + // Destroy used object and release memory + api.End(); + pixDestroy(image); + } +} diff --git a/tesseract/samples/src/main/java/org/bytedeco/javacpp/samples/tesseract/OrientationAndScriptDetectionExample.java b/tesseract/samples/src/main/java/org/bytedeco/javacpp/samples/tesseract/OrientationAndScriptDetectionExample.java new file mode 100644 index 00000000000..a7b9955e780 --- /dev/null +++ b/tesseract/samples/src/main/java/org/bytedeco/javacpp/samples/tesseract/OrientationAndScriptDetectionExample.java @@ -0,0 +1,60 @@ + +package org.bytedeco.javacpp.samples.tesseract; + +import static org.bytedeco.javacpp.lept.*; +import static org.bytedeco.javacpp.tesseract.*; + +import java.io.File; +import java.net.URL; + +import org.bytedeco.javacpp.BytePointer; +import org.bytedeco.javacpp.Loader; +import org.bytedeco.javacpp.tesseract; + +/** + * To run this program, you need to configure: + * + * + * @author Arnaud Jeansen + */ +public class OrientationAndScriptDetectionExample { + public static void main(String[] args) throws Exception { + BytePointer outText; + + TessBaseAPI api = new TessBaseAPI(); + // Initialize tesseract-ocr with English, intializing tessdata path with the standard ENV variable + if (api.Init(System.getenv("TESSDATA_PREFIX") + "/tessdata", "eng") != 0) { + System.err.println("Could not initialize tesseract."); + System.exit(1); + } + + // Open input image with leptonica library + URL url = new URL("https://upload.wikimedia.org/wikipedia/commons/thumb/7/74/Computer_modern_sample.svg/1920px-Computer_modern_sample.svg.png"); + File file = Loader.cacheResource(url); + PIX image = pixRead(file.getAbsolutePath()); + api.SetPageSegMode(PSM_AUTO_OSD); + api.SetImage(image); + tesseract.ETEXT_DESC reco = TessMonitorCreate(); + api.Recognize(reco); + + tesseract.PageIterator iterator = api.AnalyseLayout(); + int[] orientation = new int[1]; + int[] writing_direction = new int[1]; + int[] textline_order = new int[1]; + float[] deskew_angle = new float[1]; + + iterator.Orientation(orientation, writing_direction, textline_order, deskew_angle); + String osdInformation = String.format("Orientation: %d;\nWritingDirection: %d\nTextlineOrder: %d\nDeskew angle: %.4f\n", + orientation[0], writing_direction[0], textline_order[0], deskew_angle[0]); + System.out.println(osdInformation); + + // Destroy used object and release memory + api.End(); + pixDestroy(image); + } +} diff --git a/tesseract/samples/src/main/java/org/bytedeco/javacpp/samples/tesseract/ResultIteratorExample.java b/tesseract/samples/src/main/java/org/bytedeco/javacpp/samples/tesseract/ResultIteratorExample.java new file mode 100644 index 00000000000..bb83a41a648 --- /dev/null +++ b/tesseract/samples/src/main/java/org/bytedeco/javacpp/samples/tesseract/ResultIteratorExample.java @@ -0,0 +1,64 @@ + +package org.bytedeco.javacpp.samples.tesseract; + +import static org.bytedeco.javacpp.lept.*; +import static org.bytedeco.javacpp.tesseract.*; + +import java.io.File; +import java.net.URL; + +import org.bytedeco.javacpp.BytePointer; +import org.bytedeco.javacpp.Loader; +import org.bytedeco.javacpp.tesseract; + +/** + * To run this program, you need to configure: + * + * + * @author Arnaud Jeansen + */ +public class ResultIteratorExample { + public static void main(String[] args) throws Exception { + BytePointer outText; + + TessBaseAPI api = new TessBaseAPI(); + // Initialize tesseract-ocr with English, intializing tessdata path with the standard ENV variable + if (api.Init(System.getenv("TESSDATA_PREFIX") + "/tessdata", "eng") != 0) { + System.err.println("Could not initialize tesseract."); + System.exit(1); + } + + // Open input image with leptonica library + URL url = new URL("https://upload.wikimedia.org/wikipedia/commons/thumb/7/74/Computer_modern_sample.svg/1920px-Computer_modern_sample.svg.png"); + File file = Loader.cacheResource(url); + PIX image = pixRead(file.getAbsolutePath()); + api.SetImage(image); + + tesseract.ETEXT_DESC recoc = TessMonitorCreate(); + api.Recognize(recoc); + + tesseract.ResultIterator ri = api.GetIterator(); + int pageIteratorLevel = RIL_WORD; + if (ri != null) { + do { + outText = ri.GetUTF8Text(pageIteratorLevel); + float conf = ri.Confidence(pageIteratorLevel); + int[] x1 = new int[1], y1 = new int[1], x2 = new int[1], y2 = new int[1]; + ri.BoundingBox(pageIteratorLevel, x1, y1, x2, y2); + String riInformation = String.format("word: '%s'; \tconf: %.2f; BoundingBox: %d,%d,%d,%d;\n", outText.getString(), conf, x1[0], y1[0], x2[0], y2[0]); + System.out.println(riInformation); + + outText.deallocate(); + } while (ri.Next(pageIteratorLevel)); + } + + // Destroy used object and release memory + api.End(); + pixDestroy(image); + } +}