Skip to content

Commit

Permalink
#124 - OCR Tessaract Support
Browse files Browse the repository at this point in the history
  • Loading branch information
Mike Friesen authored and mfriesen committed May 11, 2023
1 parent 7739e28 commit f13f65c
Show file tree
Hide file tree
Showing 74 changed files with 4,031 additions and 39 deletions.
3 changes: 3 additions & 0 deletions aws-dynamodb/config/checkstyle/import-control.xml
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,9 @@
<allow pkg="software.amazon.awssdk.services.dynamodb" />
<allow pkg="com.amazonaws.xray.interceptors" />

<subpackage name="objects">
<allow pkg="java.time" />
</subpackage>
</subpackage>

</import-control>
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
package com.formkiq.stacks.dynamodb;
package com.formkiq.aws.dynamodb.objects;

import java.text.SimpleDateFormat;
import java.time.DateTimeException;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,14 +21,14 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
package com.formkiq.stacks.common.formats;
package com.formkiq.aws.dynamodb.objects;

import static com.formkiq.stacks.common.formats.MimeType.MIME_DOCX;
import static com.formkiq.stacks.common.formats.MimeType.MIME_HTML;
import static com.formkiq.stacks.common.formats.MimeType.MIME_JPEG;
import static com.formkiq.stacks.common.formats.MimeType.MIME_JSON;
import static com.formkiq.stacks.common.formats.MimeType.MIME_PDF;
import static com.formkiq.stacks.common.formats.MimeType.MIME_PNG;
import static com.formkiq.aws.dynamodb.objects.MimeType.MIME_DOCX;
import static com.formkiq.aws.dynamodb.objects.MimeType.MIME_HTML;
import static com.formkiq.aws.dynamodb.objects.MimeType.MIME_JPEG;
import static com.formkiq.aws.dynamodb.objects.MimeType.MIME_JSON;
import static com.formkiq.aws.dynamodb.objects.MimeType.MIME_PDF;
import static com.formkiq.aws.dynamodb.objects.MimeType.MIME_PNG;
import java.util.Set;

/**
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
package com.formkiq.stacks.common.formats;
package com.formkiq.aws.dynamodb.objects;

/**
* Supported Conversion Formats.
Expand Down
1 change: 1 addition & 0 deletions build.gradle
Original file line number Diff line number Diff line change
Expand Up @@ -118,6 +118,7 @@ task buildDistribution(type: Copy) {
from 'lambda-api/build/distributions/formkiq-core'
from 'lambda-s3/build/distributions/formkiq-core'
from 'lambda-typesense/build/distributions/formkiq-module-lambda-typesense'
from 'lambda-ocr-tesseract/build/distributions/formkiq-core'
from 'module-email-notify/build/distributions/formkiq-core'
from 'websocket-api/build/distributions/formkiq-core'
into "${buildDir}/distributions/formkiq-core"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,7 @@
import com.formkiq.aws.dynamodb.model.DocumentTag;
import com.formkiq.aws.dynamodb.model.DocumentTagType;
import com.formkiq.aws.dynamodb.model.DynamicDocumentItem;
import com.formkiq.aws.dynamodb.objects.DateUtil;
import com.formkiq.aws.dynamodb.objects.Objects;
import com.formkiq.aws.dynamodb.objects.Strings;
import software.amazon.awssdk.services.dynamodb.DynamoDbClient;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,7 @@
import com.formkiq.aws.dynamodb.model.DocumentSyncServiceType;
import com.formkiq.aws.dynamodb.model.DocumentSyncStatus;
import com.formkiq.aws.dynamodb.model.DocumentSyncType;
import com.formkiq.aws.dynamodb.objects.DateUtil;
import software.amazon.awssdk.services.dynamodb.model.AttributeValue;
import software.amazon.awssdk.services.dynamodb.model.QueryResponse;

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@
import com.formkiq.aws.dynamodb.DynamoDbConnectionBuilder;
import com.formkiq.aws.dynamodb.DynamoDbService;
import com.formkiq.aws.dynamodb.DynamoDbServiceImpl;
import com.formkiq.aws.dynamodb.objects.DateUtil;
import com.formkiq.graalvm.annotations.Reflectable;
import software.amazon.awssdk.services.dynamodb.DynamoDbClient;
import software.amazon.awssdk.services.dynamodb.model.AttributeValue;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@
import com.formkiq.aws.dynamodb.DynamoDbServiceImpl;
import com.formkiq.aws.dynamodb.SiteIdKeyGenerator;
import com.formkiq.aws.dynamodb.model.DocumentItem;
import com.formkiq.aws.dynamodb.objects.DateUtil;
import software.amazon.awssdk.services.dynamodb.DynamoDbClient;
import software.amazon.awssdk.services.dynamodb.model.AttributeValue;
import software.amazon.awssdk.services.dynamodb.model.CancellationReason;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,7 @@
import com.formkiq.aws.dynamodb.PaginationToAttributeValue;
import com.formkiq.aws.dynamodb.QueryResponseToPagination;
import com.formkiq.aws.dynamodb.model.DocumentTag;
import com.formkiq.aws.dynamodb.objects.DateUtil;
import software.amazon.awssdk.services.dynamodb.DynamoDbClient;
import software.amazon.awssdk.services.dynamodb.model.AttributeValue;
import software.amazon.awssdk.services.dynamodb.model.AttributeValueUpdate;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@
import java.util.TimeZone;
import org.junit.Before;
import org.junit.Test;
import com.formkiq.aws.dynamodb.objects.DateUtil;

/**
* Unit Tests for {@link DateUtil}.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,7 @@
import com.formkiq.aws.dynamodb.model.SearchMetaCriteria;
import com.formkiq.aws.dynamodb.model.SearchQuery;
import com.formkiq.aws.dynamodb.model.SearchTagCriteria;
import com.formkiq.aws.dynamodb.objects.DateUtil;
import com.formkiq.testutils.aws.DynamoDbExtension;
import com.formkiq.testutils.aws.DynamoDbTestServices;

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,7 @@
import com.formkiq.aws.dynamodb.model.SearchMetaCriteria;
import com.formkiq.aws.dynamodb.model.SearchQuery;
import com.formkiq.aws.dynamodb.model.SearchTagCriteria;
import com.formkiq.aws.dynamodb.objects.DateUtil;
import com.formkiq.testutils.aws.DynamoDbExtension;
import com.formkiq.testutils.aws.DynamoDbTestServices;
import software.amazon.awssdk.services.dynamodb.DynamoDbClient;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@
import java.net.http.HttpRequest.BodyPublishers;
import java.net.http.HttpResponse;
import java.net.http.HttpResponse.BodyHandlers;
import java.util.Base64;
import java.util.List;
import java.util.Map;
import java.util.Optional;
Expand Down Expand Up @@ -124,9 +125,34 @@ public static String addDocumentWithActions(final FormKiqClient client, final St
return client.addDocument(new AddDocumentRequest().siteId(siteId).document(new AddDocument()
.path(path).content(content).contentType(contentType).tags(tags).actions(actions)))
.documentId();

}

/**
* Add Document with Actions.
*
* @param client {@link FormKiqClient}
* @param siteId {@link String}
* @param path {@link String}
* @param content {@link String}
* @param contentType {@link String}
* @param actions {@link List} {@link AddDocumentAction}
* @param tags {@link List} {@link DocumentTag}
* @return {@link String}
* @throws IOException IOException
* @throws InterruptedException InterruptedException
*/
public static String addDocumentWithActions(final FormKiqClient client, final String siteId,
final String path, final byte[] content, final String contentType,
final List<AddDocumentAction> actions, final List<AddDocumentTag> tags)
throws IOException, InterruptedException {

String base64 = Base64.getEncoder().encodeToString(content);

return client.addDocument(new AddDocumentRequest().siteId(siteId).document(new AddDocument()
.path(path).contentAsBase64(base64).contentType(contentType).tags(tags).actions(actions)))
.documentId();
}

/**
* Convert {@link HttpResponse} to {@link Map}.
*
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@

import static com.formkiq.testutils.aws.TestServices.BUCKET_NAME;
import static com.formkiq.testutils.aws.TestServices.FORMKIQ_APP_ENVIRONMENT;
import static com.formkiq.testutils.aws.TestServices.OCR_BUCKET_NAME;
import static com.formkiq.testutils.aws.TestServices.STAGE_BUCKET_NAME;
import org.junit.jupiter.api.extension.BeforeAllCallback;
import org.junit.jupiter.api.extension.ExtensionContext;
Expand Down Expand Up @@ -54,6 +55,10 @@ public void beforeAll(final ExtensionContext context) throws Exception {
s3service.createBucket(STAGE_BUCKET_NAME);
}

if (!s3service.exists(OCR_BUCKET_NAME)) {
s3service.createBucket(OCR_BUCKET_NAME);
}

new SsmServiceImpl(TestServices.getSsmConnection(null))
.putParameter("/formkiq/" + FORMKIQ_APP_ENVIRONMENT + "/version", "1.1");
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -83,6 +83,8 @@ public final class TestServices {
private static SsmConnectionBuilder ssmConnection;
/** {@link String}. */
public static final String STAGE_BUCKET_NAME = "stagebucket";
/** {@link String}. */
public static final String OCR_BUCKET_NAME = "ocrbucket";

/**
* Get Local Stack Endpoint.
Expand Down
7 changes: 0 additions & 7 deletions lambda-api/Dockerfile

This file was deleted.

6 changes: 4 additions & 2 deletions lambda-api/build.gradle
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ dependencies {
implementation project(':dynamodb-documents')
implementation project(':actions')
implementation project(':typesense')
implementation project(':ocr')
implementation project(':http-sigv4')

implementation group: 'com.amazonaws', name: 'aws-lambda-java-core', version: '1.2.2'
Expand Down Expand Up @@ -97,7 +98,7 @@ task buildJava11Zip(type: Zip) {

task assembleTemplate {
dependsOn buildZip
inputs.files("src/main/resources/cloudformation/template-snippet.yaml", "src/main/resources/cloudformation/api.yaml", "src/main/resources/cloudformation/api-iam.yaml")
inputs.files("src/main/resources/cloudformation/template.yaml", "src/main/resources/cloudformation/template-ocr.yaml", "src/main/resources/cloudformation/api.yaml", "src/main/resources/cloudformation/api-iam.yaml")
outputs.dir("${buildDir}/distributions/formkiq-core")

def randomtext = ('0'..'z').shuffled().take(10).join();
Expand All @@ -107,12 +108,13 @@ task assembleTemplate {

copy {
from layout.buildDirectory.file("${buildDir}/lambda-api-graalvm.zip")
from layout.buildDirectory.file("${buildDir}/layer-tesseract-5.3.1.zip")
into "${buildDir}/distributions/formkiq-core/sam/api"
}

// build SAM distribution
exec {
commandLine "bash", "-c", "ytt --data-value hash=${sha256} -f src/main/resources/cloudformation/template-snippet.yaml -f src/main/resources/cloudformation/api.yaml -f src/main/resources/cloudformation/api-iam.yaml > ${buildDir}/distributions/formkiq-core/sam/api/template.yaml"
commandLine "bash", "-c", "ytt --data-value hash=${sha256} -f src/main/resources/cloudformation/template.yaml -f src/main/resources/cloudformation/template-ocr.yaml -f src/main/resources/cloudformation/api.yaml -f src/main/resources/cloudformation/api-iam.yaml > ${buildDir}/distributions/formkiq-core/sam/api/template.yaml"
}
}
}
Expand Down
5 changes: 3 additions & 2 deletions lambda-api/config/checkstyle/import-control.xml
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@

<allow pkg="com.amazonaws.services.lambda.runtime" />
<allow pkg="com.amazonaws.xray" />

<allow pkg="com.formkiq.module.ocr" />
<allow pkg="software.amazon.awssdk" />

<allow pkg="com.formkiq.aws.dynamodb" />
Expand Down Expand Up @@ -79,8 +79,9 @@

<subpackage name="handler">

<allow pkg="com.formkiq.module.typesense" />
<allow pkg="com.formkiq.aws.services.lambda" />
<allow pkg="com.formkiq.module.typesense" />
<allow pkg="com.formkiq.module.ocr" />
<allow pkg="com.formkiq.stacks.api" />
<allow pkg="com.formkiq.stacks.common.formats" />
<allow pkg="com.formkiq.stacks.common.objects" />
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,8 @@
import com.formkiq.module.actions.services.ActionsServiceExtension;
import com.formkiq.module.lambdaservices.AwsServiceCache;
import com.formkiq.module.lambdaservices.ClassServiceExtension;
import com.formkiq.module.ocr.DocumentOcrService;
import com.formkiq.module.ocr.DocumentOcrServiceExtension;
import com.formkiq.plugins.tagschema.DocumentTagSchemaPlugin;
import com.formkiq.plugins.tagschema.DocumentTagSchemaPluginExtension;
import com.formkiq.stacks.api.handler.DocumentIdContentRequestHandler;
Expand Down Expand Up @@ -236,6 +238,7 @@ public static void configureHandler(final Map<String, String> map, final Region
AwsServiceCache.register(FolderIndexProcessor.class, new IndexProcessorExtension());
AwsServiceCache.register(ConfigService.class, new ConfigServiceExtension());
AwsServiceCache.register(DocumentSyncService.class, new DocumentSyncServiceExtension());
AwsServiceCache.register(DocumentOcrService.class, new DocumentOcrServiceExtension());

awsServices = new CoreAwsServiceCache().environment(map).debug("true".equals(map.get("DEBUG")));

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@
import com.amazonaws.services.lambda.runtime.LambdaLogger;
import com.formkiq.aws.dynamodb.DynamoDbConnectionBuilder;
import com.formkiq.aws.dynamodb.model.DocumentItem;
import com.formkiq.aws.dynamodb.objects.MimeType;
import com.formkiq.aws.s3.PresignGetUrlConfig;
import com.formkiq.aws.s3.S3Service;
import com.formkiq.aws.services.lambda.ApiAuthorizer;
Expand All @@ -42,7 +43,6 @@
import com.formkiq.aws.services.lambda.ApiResponse;
import com.formkiq.aws.services.lambda.exceptions.NotFoundException;
import com.formkiq.module.lambdaservices.AwsServiceCache;
import com.formkiq.stacks.common.formats.MimeType;
import com.formkiq.stacks.dynamodb.DocumentService;
import com.formkiq.stacks.dynamodb.DocumentVersionService;

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,7 @@
import com.formkiq.aws.dynamodb.model.DocumentMetadata;
import com.formkiq.aws.dynamodb.model.DocumentTag;
import com.formkiq.aws.dynamodb.model.DynamicDocumentItem;
import com.formkiq.aws.dynamodb.objects.DateUtil;
import com.formkiq.aws.s3.S3ObjectMetadata;
import com.formkiq.aws.s3.S3Service;
import com.formkiq.aws.services.lambda.ApiAuthorizer;
Expand All @@ -66,7 +67,6 @@
import com.formkiq.aws.services.lambda.services.CacheService;
import com.formkiq.module.lambdaservices.AwsServiceCache;
import com.formkiq.plugins.tagschema.DocumentTagSchemaPlugin;
import com.formkiq.stacks.dynamodb.DateUtil;
import com.formkiq.stacks.dynamodb.DocumentCountService;
import com.formkiq.stacks.dynamodb.DocumentItemToDynamicDocumentItem;
import com.formkiq.stacks.dynamodb.DocumentService;
Expand Down
Loading

0 comments on commit f13f65c

Please sign in to comment.