-
Notifications
You must be signed in to change notification settings - Fork 741
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
* Add
IteratorOverClassifierChoicesExample
, `OrientationAndScriptD…
…etectionExample`, and `ResultIteratorExample` for Tesseract (pull #675)
- Loading branch information
Showing
3 changed files
with
197 additions
and
0 deletions.
There are no files selected for viewing
73 changes: 73 additions & 0 deletions
73
...ain/java/org/bytedeco/javacpp/samples/tesseract/IteratorOverClassifierChoicesExample.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,73 @@ | ||
|
||
package org.bytedeco.javacpp.samples.tesseract; | ||
|
||
import static org.bytedeco.javacpp.lept.*; | ||
import static org.bytedeco.javacpp.tesseract.*; | ||
|
||
import java.io.File; | ||
import java.net.URL; | ||
|
||
import org.bytedeco.javacpp.BytePointer; | ||
import org.bytedeco.javacpp.Loader; | ||
|
||
/** | ||
* To run this program, you need to configure: | ||
* <ul> | ||
* <li>An environment variable pointing to the dictionaries installed on the system | ||
* TESSDATA_PREFIX=/usr/share/tesseract-ocr/4.00</li> | ||
* <li>An environment variable to tweak the Locale | ||
* LC_ALL=C</li> | ||
* </ul> | ||
* | ||
* @author Arnaud Jeansen | ||
*/ | ||
public class IteratorOverClassifierChoicesExample { | ||
public static void main(String[] args) throws Exception { | ||
BytePointer outText; | ||
BytePointer choiceText; | ||
|
||
TessBaseAPI api = new TessBaseAPI(); | ||
// Initialize tesseract-ocr with English, intializing tessdata path with the standard ENV variable | ||
if (api.Init(System.getenv("TESSDATA_PREFIX") + "/tessdata", "eng") != 0) { | ||
System.err.println("Could not initialize tesseract."); | ||
System.exit(1); | ||
} | ||
|
||
// Open input image with leptonica library | ||
URL url = new URL("https://upload.wikimedia.org/wikipedia/commons/thumb/7/74/Computer_modern_sample.svg/1920px-Computer_modern_sample.svg.png"); | ||
File file = Loader.cacheResource(url); | ||
PIX image = pixRead(file.getAbsolutePath()); | ||
api.SetImage(image); | ||
|
||
ETEXT_DESC recoc = TessMonitorCreate(); | ||
api.Recognize(recoc); | ||
|
||
ResultIterator ri = api.GetIterator(); | ||
int pageIteratorLevel = RIL_SYMBOL; | ||
if (ri != null) { | ||
do { | ||
outText = ri.GetUTF8Text(pageIteratorLevel); | ||
float conf = ri.Confidence(pageIteratorLevel); | ||
String symbolInformation = String.format("symbol: '%s'; \tconf: %.2f", outText.getString(), conf); | ||
System.out.println(symbolInformation); | ||
boolean indent = false; | ||
ChoiceIterator ci = TessResultIteratorGetChoiceIterator(ri); | ||
do { | ||
if (indent) | ||
System.out.print("\t\t"); | ||
System.out.print("\t-"); | ||
choiceText = ci.GetUTF8Text(); | ||
System.out.println(String.format("%s conf: %f", choiceText.getString(), ci.Confidence())); | ||
indent = true; | ||
choiceText.deallocate(); | ||
} while (ci.Next()); | ||
|
||
outText.deallocate(); | ||
} while (ri.Next(pageIteratorLevel)); | ||
} | ||
|
||
// Destroy used object and release memory | ||
api.End(); | ||
pixDestroy(image); | ||
} | ||
} |
60 changes: 60 additions & 0 deletions
60
...ain/java/org/bytedeco/javacpp/samples/tesseract/OrientationAndScriptDetectionExample.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,60 @@ | ||
|
||
package org.bytedeco.javacpp.samples.tesseract; | ||
|
||
import static org.bytedeco.javacpp.lept.*; | ||
import static org.bytedeco.javacpp.tesseract.*; | ||
|
||
import java.io.File; | ||
import java.net.URL; | ||
|
||
import org.bytedeco.javacpp.BytePointer; | ||
import org.bytedeco.javacpp.Loader; | ||
import org.bytedeco.javacpp.tesseract; | ||
|
||
/** | ||
* To run this program, you need to configure: | ||
* <ul> | ||
* <li>An environment variable pointing to the dictionaries installed on the system | ||
* TESSDATA_PREFIX=/usr/share/tesseract-ocr/4.00</li> | ||
* <li>An environment variable to tweak the Locale | ||
* LC_ALL=C</li> | ||
* </ul> | ||
* | ||
* @author Arnaud Jeansen | ||
*/ | ||
public class OrientationAndScriptDetectionExample { | ||
public static void main(String[] args) throws Exception { | ||
BytePointer outText; | ||
|
||
TessBaseAPI api = new TessBaseAPI(); | ||
// Initialize tesseract-ocr with English, intializing tessdata path with the standard ENV variable | ||
if (api.Init(System.getenv("TESSDATA_PREFIX") + "/tessdata", "eng") != 0) { | ||
System.err.println("Could not initialize tesseract."); | ||
System.exit(1); | ||
} | ||
|
||
// Open input image with leptonica library | ||
URL url = new URL("https://upload.wikimedia.org/wikipedia/commons/thumb/7/74/Computer_modern_sample.svg/1920px-Computer_modern_sample.svg.png"); | ||
File file = Loader.cacheResource(url); | ||
PIX image = pixRead(file.getAbsolutePath()); | ||
api.SetPageSegMode(PSM_AUTO_OSD); | ||
api.SetImage(image); | ||
tesseract.ETEXT_DESC reco = TessMonitorCreate(); | ||
api.Recognize(reco); | ||
|
||
tesseract.PageIterator iterator = api.AnalyseLayout(); | ||
int[] orientation = new int[1]; | ||
int[] writing_direction = new int[1]; | ||
int[] textline_order = new int[1]; | ||
float[] deskew_angle = new float[1]; | ||
|
||
iterator.Orientation(orientation, writing_direction, textline_order, deskew_angle); | ||
String osdInformation = String.format("Orientation: %d;\nWritingDirection: %d\nTextlineOrder: %d\nDeskew angle: %.4f\n", | ||
orientation[0], writing_direction[0], textline_order[0], deskew_angle[0]); | ||
System.out.println(osdInformation); | ||
|
||
// Destroy used object and release memory | ||
api.End(); | ||
pixDestroy(image); | ||
} | ||
} |
64 changes: 64 additions & 0 deletions
64
...t/samples/src/main/java/org/bytedeco/javacpp/samples/tesseract/ResultIteratorExample.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,64 @@ | ||
|
||
package org.bytedeco.javacpp.samples.tesseract; | ||
|
||
import static org.bytedeco.javacpp.lept.*; | ||
import static org.bytedeco.javacpp.tesseract.*; | ||
|
||
import java.io.File; | ||
import java.net.URL; | ||
|
||
import org.bytedeco.javacpp.BytePointer; | ||
import org.bytedeco.javacpp.Loader; | ||
import org.bytedeco.javacpp.tesseract; | ||
|
||
/** | ||
* To run this program, you need to configure: | ||
* <ul> | ||
* <li>An environment variable pointing to the dictionaries installed on the system | ||
* TESSDATA_PREFIX=/usr/share/tesseract-ocr/4.00</li> | ||
* <li>An environment variable to tweak the Locale | ||
* LC_ALL=C</li> | ||
* </ul> | ||
* | ||
* @author Arnaud Jeansen | ||
*/ | ||
public class ResultIteratorExample { | ||
public static void main(String[] args) throws Exception { | ||
BytePointer outText; | ||
|
||
TessBaseAPI api = new TessBaseAPI(); | ||
// Initialize tesseract-ocr with English, intializing tessdata path with the standard ENV variable | ||
if (api.Init(System.getenv("TESSDATA_PREFIX") + "/tessdata", "eng") != 0) { | ||
System.err.println("Could not initialize tesseract."); | ||
System.exit(1); | ||
} | ||
|
||
// Open input image with leptonica library | ||
URL url = new URL("https://upload.wikimedia.org/wikipedia/commons/thumb/7/74/Computer_modern_sample.svg/1920px-Computer_modern_sample.svg.png"); | ||
File file = Loader.cacheResource(url); | ||
PIX image = pixRead(file.getAbsolutePath()); | ||
api.SetImage(image); | ||
|
||
tesseract.ETEXT_DESC recoc = TessMonitorCreate(); | ||
api.Recognize(recoc); | ||
|
||
tesseract.ResultIterator ri = api.GetIterator(); | ||
int pageIteratorLevel = RIL_WORD; | ||
if (ri != null) { | ||
do { | ||
outText = ri.GetUTF8Text(pageIteratorLevel); | ||
float conf = ri.Confidence(pageIteratorLevel); | ||
int[] x1 = new int[1], y1 = new int[1], x2 = new int[1], y2 = new int[1]; | ||
ri.BoundingBox(pageIteratorLevel, x1, y1, x2, y2); | ||
String riInformation = String.format("word: '%s'; \tconf: %.2f; BoundingBox: %d,%d,%d,%d;\n", outText.getString(), conf, x1[0], y1[0], x2[0], y2[0]); | ||
System.out.println(riInformation); | ||
|
||
outText.deallocate(); | ||
} while (ri.Next(pageIteratorLevel)); | ||
} | ||
|
||
// Destroy used object and release memory | ||
api.End(); | ||
pixDestroy(image); | ||
} | ||
} |