ts);
+
+ public abstract Builder error(@Nullable Status err);
+
+ public abstract ImageText build();
+ }
+}
diff --git a/vision/text/src/main/java/com/google/cloud/vision/samples/text/Index.java b/vision/text/src/main/java/com/google/cloud/vision/samples/text/Index.java
new file mode 100644
index 00000000000..4c739a66dfd
--- /dev/null
+++ b/vision/text/src/main/java/com/google/cloud/vision/samples/text/Index.java
@@ -0,0 +1,181 @@
+/*
+ * Copyright 2016 Google Inc. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package com.google.cloud.vision.samples.text;
+
+import com.google.common.collect.ImmutableSet;
+
+import opennlp.tools.stemmer.Stemmer;
+import opennlp.tools.tokenize.Tokenizer;
+import opennlp.tools.tokenize.TokenizerModel;
+
+import redis.clients.jedis.Jedis;
+import redis.clients.jedis.JedisPool;
+import redis.clients.jedis.JedisPoolConfig;
+
+import java.io.FileInputStream;
+import java.io.IOException;
+import java.io.InputStream;
+import java.nio.file.Path;
+import java.util.HashSet;
+import java.util.stream.Stream;
+
+/**
+ * An inverted index using Redis.
+ *
+ * The {@code Index} indexes the files in which each keyword stem was found and supports queries
+ * on the index.
+ */
+public class Index {
+ private static final int TOKEN_DB = 0;
+ private static final int DOCS_DB = 1;
+
+ /**
+ * Parses tokenizer data and creates a tokenizer.
+ */
+ public static TokenizerModel getEnglishTokenizerMeModel() throws IOException {
+ try (InputStream modelIn = new FileInputStream("en-token.bin")) {
+ return new TokenizerModel(modelIn);
+ }
+ }
+
+ /**
+ * Creates a Redis connection pool.
+ */
+ public static JedisPool getJedisPool() {
+ return new JedisPool(new JedisPoolConfig(), "localhost");
+ }
+
+ private final Tokenizer tokenizer;
+ private final Stemmer stemmer;
+ private final JedisPool pool;
+
+ /**
+ * Constructs a connection to the index.
+ */
+ public Index(Tokenizer tokenizer, Stemmer stemmer, JedisPool pool) {
+ this.tokenizer = tokenizer;
+ this.stemmer = stemmer;
+ this.pool = pool;
+ }
+
+ /**
+ * Prints {@code words} information from the index.
+ */
+ public void printLookup(Iterable words) {
+ ImmutableSet hits = lookup(words);
+ if (hits.size() == 0) {
+ System.out.print("No hits found.\n\n");
+ }
+ for (String document : hits) {
+ String text = "";
+ try (Jedis jedis = pool.getResource()) {
+ jedis.select(DOCS_DB);
+ text = jedis.get(document);
+ }
+ System.out.printf("***Image %s has text:\n%s\n", document, text);
+ }
+ }
+
+ /**
+ * Looks up the set of documents containing each word. Returns the intersection of these.
+ */
+ public ImmutableSet lookup(Iterable words) {
+ HashSet documents = null;
+ try (Jedis jedis = pool.getResource()) {
+ jedis.select(TOKEN_DB);
+ for (String word : words) {
+ word = stemmer.stem(word.toLowerCase()).toString();
+ if (documents == null) {
+ documents = new HashSet();
+ documents.addAll(jedis.smembers(word));
+ } else {
+ documents.retainAll(jedis.smembers(word));
+ }
+ }
+ }
+ if (documents == null) {
+ return ImmutableSet.of();
+ }
+ return ImmutableSet.copyOf(documents);
+ }
+
+ /**
+ * Checks if the document at {@code path} needs to be processed.
+ */
+ public boolean isDocumentUnprocessed(Path path) {
+ try (Jedis jedis = pool.getResource()) {
+ jedis.select(DOCS_DB);
+ String result = jedis.get(path.toString());
+ if (result == null) {
+ return true;
+ }
+ if (result.equals("")) {
+ System.out.printf("File %s was already checked, and contains no text.\n", path);
+ return false;
+ }
+ System.out.printf("%s already added to index.\n", path);
+ return false;
+ }
+ }
+
+ /**
+ * Extracts all tokens from a {@code document} as a stream.
+ */
+ public Stream extractTokens(Word document) {
+ Stream.Builder output = Stream.builder();
+ String[] words = tokenizer.tokenize(document.word());
+ // Ensure we track empty documents throughout so that they are not reprocessed.
+ if (words.length == 0) {
+ output.add(Word.builder().path(document.path()).word("").build());
+ return output.build();
+ }
+ for (int i = 0; i < words.length; i++) {
+ output.add(Word.builder().path(document.path()).word(words[i]).build());
+ }
+ return output.build();
+ }
+
+ /**
+ * Extracts the stem from a {@code word}.
+ */
+ public Word stem(Word word) {
+ return Word.builder().path(word.path()).word(stemmer.stem(word.word()).toString()).build();
+ }
+
+ /**
+ * Adds a {@code document} to the index.
+ */
+ public void addDocument(Word document) {
+ try (Jedis jedis = pool.getResource()) {
+ jedis.select(DOCS_DB);
+ jedis.set(document.path().toString(), document.word());
+ }
+ extractTokens(document)
+ .map(this::stem)
+ .forEach(this::add);
+ }
+
+ /**
+ * Adds a {@code word} to the index.
+ */
+ public void add(Word word) {
+ try (Jedis jedis = pool.getResource()) {
+ jedis.select(TOKEN_DB);
+ jedis.sadd(word.word().toLowerCase(), word.path().toString());
+ }
+ }
+}
diff --git a/vision/text/src/main/java/com/google/cloud/vision/samples/text/TextApp.java b/vision/text/src/main/java/com/google/cloud/vision/samples/text/TextApp.java
new file mode 100644
index 00000000000..1ca841069ef
--- /dev/null
+++ b/vision/text/src/main/java/com/google/cloud/vision/samples/text/TextApp.java
@@ -0,0 +1,250 @@
+/*
+ * Copyright 2016 Google Inc. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package com.google.cloud.vision.samples.text;
+
+import com.google.api.client.googleapis.auth.oauth2.GoogleCredential;
+import com.google.api.client.googleapis.javanet.GoogleNetHttpTransport;
+import com.google.api.client.json.JsonFactory;
+import com.google.api.client.json.jackson2.JacksonFactory;
+import com.google.api.services.vision.v1.Vision;
+import com.google.api.services.vision.v1.VisionScopes;
+import com.google.api.services.vision.v1.model.AnnotateImageRequest;
+import com.google.api.services.vision.v1.model.AnnotateImageResponse;
+import com.google.api.services.vision.v1.model.BatchAnnotateImagesRequest;
+import com.google.api.services.vision.v1.model.BatchAnnotateImagesResponse;
+import com.google.api.services.vision.v1.model.EntityAnnotation;
+import com.google.api.services.vision.v1.model.Feature;
+import com.google.api.services.vision.v1.model.Image;
+import com.google.api.services.vision.v1.model.Status;
+import com.google.common.base.MoreObjects;
+import com.google.common.base.Splitter;
+import com.google.common.collect.ImmutableList;
+import com.google.common.collect.Lists;
+
+import opennlp.tools.stemmer.snowball.SnowballStemmer;
+import opennlp.tools.tokenize.TokenizerME;
+
+import redis.clients.jedis.JedisPool;
+
+import java.io.Console;
+import java.io.IOException;
+import java.nio.file.Files;
+import java.nio.file.Path;
+import java.nio.file.Paths;
+import java.security.GeneralSecurityException;
+import java.util.List;
+import java.util.stream.Collectors;
+
+
+/**
+ * A sample application that uses the Vision API to OCR text in an image.
+ */
+@SuppressWarnings("serial")
+public class TextApp {
+ private static final int MAX_RESULTS = 6;
+ private static final int BATCH_SIZE = 10;
+
+ /**
+ * Be sure to specify the name of your application. If the application name is {@code null} or
+ * blank, the application will log a warning. Suggested format is "MyCompany-ProductName/1.0".
+ */
+ private static final String APPLICATION_NAME = "Google-VisionTextSample/1.0";
+
+ /**
+ * Connects to the Vision API using Application Default Credentials.
+ */
+ public static Vision getVisionService() throws IOException, GeneralSecurityException {
+ GoogleCredential credential =
+ GoogleCredential.getApplicationDefault().createScoped(VisionScopes.all());
+ JsonFactory jsonFactory = JacksonFactory.getDefaultInstance();
+ return new Vision.Builder(GoogleNetHttpTransport.newTrustedTransport(), jsonFactory, credential)
+ .setApplicationName(APPLICATION_NAME)
+ .build();
+ }
+
+ /**
+ * Annotates an image using the Vision API.
+ */
+ public static void main(String[] args) throws IOException, GeneralSecurityException {
+ if (args.length > 1) {
+ System.err.println("Usage:");
+ System.err.printf(
+ "\tjava %s inputDirectory\n",
+ TextApp.class.getCanonicalName());
+ System.exit(1);
+ }
+
+ JedisPool pool = Index.getJedisPool();
+ try {
+ Index index =
+ new Index(
+ new TokenizerME(Index.getEnglishTokenizerMeModel()),
+ new SnowballStemmer(SnowballStemmer.ALGORITHM.ENGLISH),
+ pool);
+ TextApp app = new TextApp(TextApp.getVisionService(), index);
+
+ if (args.length == 0) {
+ app.lookupWords();
+ return;
+ }
+ Path inputPath = Paths.get(args[0]);
+ app.indexDirectory(inputPath);
+ } finally {
+ if (pool != null) {
+ pool.destroy();
+ }
+ }
+ }
+
+ private final Vision vision;
+ private final Index index;
+
+ /**
+ * Constructs a {@code TextApp} using the {@link Vision} service.
+ */
+ public TextApp(Vision vision, Index index) {
+ this.vision = vision;
+ this.index = index;
+ }
+
+ /**
+ * Looks up words in the index that the user enters into the console.
+ */
+ public void lookupWords() {
+ System.out.println("Entering word lookup mode.");
+ System.out
+ .println("To index a directory, add an input path argument when you run this command.");
+ System.out.println();
+
+ Console console = System.console();
+ if (console == null) {
+ System.err.println("No console.");
+ System.exit(1);
+ }
+
+ while (true) {
+ String words =
+ console.readLine("Enter word(s) (comma-separated, leave blank to exit): ").trim();
+ if (words.equals("")) {
+ break;
+ }
+ index.printLookup(Splitter.on(',').split(words));
+ }
+ }
+
+ /**
+ * Indexes all the images in the {@code inputPath} directory for text.
+ */
+ public void indexDirectory(Path inputPath) throws IOException {
+ List unprocessedImages =
+ Files.walk(inputPath)
+ .filter(Files::isRegularFile)
+ .filter(index::isDocumentUnprocessed)
+ .collect(Collectors.toList());
+ Lists.partition(unprocessedImages, BATCH_SIZE)
+ .stream()
+ .map(this::detectText)
+ .flatMap(l -> l.stream())
+ .filter(this::successfullyDetectedText)
+ .map(this::extractDescriptions)
+ .forEach(index::addDocument);
+ }
+
+ /**
+ * Gets up to {@code maxResults} text annotations for images stored at {@code paths}.
+ */
+ public ImmutableList detectText(List paths) {
+ ImmutableList.Builder requests = ImmutableList.builder();
+ try {
+ for (Path path : paths) {
+ byte[] data;
+ data = Files.readAllBytes(path);
+ requests.add(
+ new AnnotateImageRequest()
+ .setImage(new Image().encodeContent(data))
+ .setFeatures(ImmutableList.of(
+ new Feature()
+ .setType("TEXT_DETECTION")
+ .setMaxResults(MAX_RESULTS))));
+ }
+
+ Vision.Images.Annotate annotate =
+ vision.images()
+ .annotate(new BatchAnnotateImagesRequest().setRequests(requests.build()));
+ // Due to a bug: requests to Vision API containing large images fail when GZipped.
+ annotate.setDisableGZipContent(true);
+ BatchAnnotateImagesResponse batchResponse = annotate.execute();
+ assert batchResponse.getResponses().size() == paths.size();
+
+ ImmutableList.Builder output = ImmutableList.builder();
+ for (int i = 0; i < paths.size(); i++) {
+ Path path = paths.get(i);
+ AnnotateImageResponse response = batchResponse.getResponses().get(i);
+ output.add(
+ ImageText.builder()
+ .path(path)
+ .textAnnotations(
+ MoreObjects.firstNonNull(
+ response.getTextAnnotations(),
+ ImmutableList.of()))
+ .error(response.getError())
+ .build());
+ }
+ return output.build();
+ } catch (IOException ex) {
+ // Got an exception, which means the whole batch had an error.
+ ImmutableList.Builder output = ImmutableList.builder();
+ for (Path path : paths) {
+ output.add(
+ ImageText.builder()
+ .path(path)
+ .textAnnotations(ImmutableList.of())
+ .error(new Status().setMessage(ex.getMessage()))
+ .build());
+ }
+ return output.build();
+ }
+ }
+
+ /**
+ * Checks that there was not an error processing an {@code image}.
+ */
+ public boolean successfullyDetectedText(ImageText image) {
+ if (image.error() != null) {
+ System.out.printf("Error reading %s:\n%s\n", image.path(), image.error().getMessage());
+ return false;
+ }
+ return true;
+ }
+
+ /**
+ * Extracts as a combinded string, all the descriptions from text annotations on an {@code image}.
+ */
+ public Word extractDescriptions(ImageText image) {
+ String document = "";
+ for (EntityAnnotation text : image.textAnnotations()) {
+ document += text.getDescription();
+ }
+ if (document.equals("")) {
+ System.out.printf("%s had no discernible text.\n", image.path());
+ }
+ // Output a progress indicator.
+ System.out.print('.');
+ System.out.flush();
+ return Word.builder().path(image.path()).word(document).build();
+ }
+}
diff --git a/vision/text/src/main/java/com/google/cloud/vision/samples/text/Word.java b/vision/text/src/main/java/com/google/cloud/vision/samples/text/Word.java
new file mode 100644
index 00000000000..0dfb4f24ed0
--- /dev/null
+++ b/vision/text/src/main/java/com/google/cloud/vision/samples/text/Word.java
@@ -0,0 +1,45 @@
+/*
+ * Copyright 2016 Google Inc. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package com.google.cloud.vision.samples.text;
+
+import com.google.auto.value.AutoValue;
+
+import java.nio.file.Path;
+
+/**
+ * A data object for mapping words to file paths.
+ */
+@AutoValue
+abstract class Word {
+
+ public static Builder builder() {
+ return new AutoValue_Word.Builder();
+ }
+
+ public abstract Path path();
+
+ public abstract String word();
+
+ @AutoValue.Builder
+ public abstract static class Builder {
+ public abstract Builder path(Path path);
+
+ public abstract Builder word(String word);
+
+ public abstract Word build();
+ }
+}
diff --git a/vision/text/src/test/java/com/google/cloud/vision/samples/text/TextAppIT.java b/vision/text/src/test/java/com/google/cloud/vision/samples/text/TextAppIT.java
new file mode 100644
index 00000000000..2e1867ebe7e
--- /dev/null
+++ b/vision/text/src/test/java/com/google/cloud/vision/samples/text/TextAppIT.java
@@ -0,0 +1,60 @@
+/*
+ * Copyright 2016 Google Inc. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package com.google.cloud.vision.samples.text;
+
+import static com.google.common.truth.Truth.assertThat;
+
+import com.google.common.collect.ImmutableList;
+
+import org.junit.Before;
+import org.junit.Test;
+import org.junit.runner.RunWith;
+import org.junit.runners.JUnit4;
+
+import java.nio.file.Path;
+import java.nio.file.Paths;
+import java.util.List;
+
+/**
+ * Integration (system) tests for {@link TextApp}.
+ **/
+@RunWith(JUnit4.class)
+@SuppressWarnings("checkstyle:abbreviationaswordinname")
+public class TextAppIT {
+ private TextApp appUnderTest;
+
+ @Before public void setUp() throws Exception {
+ appUnderTest = new TextApp(TextApp.getVisionService(), null /* index */);
+ }
+
+ @Test public void extractDescriptions_withImage_returnsText() throws Exception {
+ // Arrange
+ List image =
+ appUnderTest.detectText(ImmutableList.of(Paths.get("data/wakeupcat.jpg")));
+
+ // Act
+ Word word = appUnderTest.extractDescriptions(image.get(0));
+
+ // Assert
+ assertThat(word.path().toString())
+ .named("wakeupcat.jpg path")
+ .isEqualTo("data/wakeupcat.jpg");
+ assertThat(word.word().toLowerCase()).named("wakeupcat.jpg word").contains("wake");
+ assertThat(word.word().toLowerCase()).named("wakeupcat.jpg word").contains("up");
+ assertThat(word.word().toLowerCase()).named("wakeupcat.jpg word").contains("human");
+ }
+}
diff --git a/vision/text/src/test/java/com/google/cloud/vision/samples/text/TextAppTest.java b/vision/text/src/test/java/com/google/cloud/vision/samples/text/TextAppTest.java
new file mode 100644
index 00000000000..9ada8e0ad7f
--- /dev/null
+++ b/vision/text/src/test/java/com/google/cloud/vision/samples/text/TextAppTest.java
@@ -0,0 +1,81 @@
+/*
+ * Copyright 2016 Google Inc. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package com.google.cloud.vision.samples.text;
+
+import static com.google.common.truth.Truth.assertThat;
+
+import com.google.api.client.http.HttpTransport;
+import com.google.api.client.http.LowLevelHttpRequest;
+import com.google.api.client.http.LowLevelHttpResponse;
+import com.google.api.client.json.Json;
+import com.google.api.client.json.JsonFactory;
+import com.google.api.client.json.jackson2.JacksonFactory;
+import com.google.api.client.testing.http.MockHttpTransport;
+import com.google.api.client.testing.http.MockLowLevelHttpRequest;
+import com.google.api.client.testing.http.MockLowLevelHttpResponse;
+import com.google.api.services.vision.v1.Vision;
+import com.google.common.collect.ImmutableList;
+
+import org.junit.Before;
+import org.junit.Test;
+import org.junit.runner.RunWith;
+import org.junit.runners.JUnit4;
+
+import java.io.IOException;
+import java.nio.file.Path;
+import java.nio.file.Paths;
+import java.util.List;
+
+/**
+ * Unit tests for {@link TextApp}.
+ */
+@RunWith(JUnit4.class)
+public class TextAppTest {
+ private TextApp appUnderTest;
+
+ @Before public void setUp() throws Exception {
+ // Mock out the vision service for unit tests.
+ JsonFactory jsonFactory = JacksonFactory.getDefaultInstance();
+ HttpTransport transport = new MockHttpTransport() {
+ @Override
+ public LowLevelHttpRequest buildRequest(String method, String url) throws IOException {
+ return new MockLowLevelHttpRequest() {
+ @Override
+ public LowLevelHttpResponse execute() throws IOException {
+ MockLowLevelHttpResponse response = new MockLowLevelHttpResponse();
+ response.setStatusCode(200);
+ response.setContentType(Json.MEDIA_TYPE);
+ response.setContent("{\"responses\": [{\"textAnnotations\": []}]}");
+ return response;
+ }
+ };
+ }
+ };
+ Vision vision = new Vision(transport, jsonFactory, null);
+
+ appUnderTest = new TextApp(vision, null /* index */);
+ }
+
+ @Test public void detectText_withImage_returnsPath() throws Exception {
+ List image =
+ appUnderTest.detectText(ImmutableList.of(Paths.get("data/wakeupcat.jpg")));
+
+ assertThat(image.get(0).path().toString())
+ .named("wakeupcat.jpg path")
+ .isEqualTo("data/wakeupcat.jpg");
+ }
+}