Skip to content

Commit

Permalink
#149 - adjusting chatgpt response
Browse files Browse the repository at this point in the history
  • Loading branch information
mfriesen committed Jul 13, 2023
1 parent 67e3117 commit aeeb1d2
Show file tree
Hide file tree
Showing 5 changed files with 156 additions and 8 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -62,4 +62,14 @@ public static String getFilename(final String path) {
public static boolean isEmpty(final CharSequence cs) {
return cs == null || cs.length() == 0;
}

/**
* Remove single/double quotes from {@link String}.
*
* @param s {@link String}
* @return {@link String}
*/
public static String removeQuotes(final String s) {
return s.replaceAll("^['\"]|['\"]$", "");
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -47,4 +47,13 @@ void testFilename() {
assertEquals("test (something).txt",
Strings.getFilename("/bleh/something/test (something).txt"));
}

@Test
void replaceQuotes() {
assertEquals("text", Strings.removeQuotes("text"));
assertEquals("text", Strings.removeQuotes("\"text\""));
assertEquals("text", Strings.removeQuotes("\"text"));
assertEquals("text", Strings.removeQuotes("'text'"));
assertEquals("text", Strings.removeQuotes("\"text'"));
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@
*/
package com.formkiq.stacks.lambda.s3;

import static com.formkiq.aws.dynamodb.objects.Strings.removeQuotes;
import static com.formkiq.module.http.HttpResponseStatus.is2XX;
import static com.formkiq.stacks.dynamodb.ConfigService.CHATGPT_API_KEY;
import java.io.IOException;
Expand All @@ -36,8 +37,8 @@
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;
import java.util.stream.Collectors;
import java.util.Optional;
import java.util.stream.Collectors;
import com.amazonaws.services.lambda.runtime.LambdaLogger;
import com.formkiq.aws.dynamodb.DynamicObject;
import com.formkiq.aws.dynamodb.model.DocumentItem;
Expand Down Expand Up @@ -97,6 +98,22 @@ public DocumentTaggingAction(final AwsServiceCache services) {
this.documentService = services.getExtension(DocumentService.class);
}

/**
* Adjust Tag Key.
*
* @param tags {@link List} {@link String}
* @param key {@link String}
* @return {@link String}
*/
private String adjustKeyFromTags(final List<String> tags, final String key) {
String s = removeQuotes(key);

Optional<String> o = tags.stream().filter(t -> t.toLowerCase().replaceAll("[^A-Za-z0-9]", "")
.equals(key.toLowerCase().replaceAll("[^A-Za-z0-9]", ""))).findAny();

return o.isPresent() ? o.get() : s;
}

private String createChatGptPrompt(final String siteId, final String documentId,
final Action action) throws IOException {

Expand Down Expand Up @@ -220,7 +237,29 @@ private Map<String, Object> parseGptText(final List<String> tags, final String t
data.remove(e.getKey());
}

return data;
return data.entrySet().stream().filter(d -> d.getKey() != null && d.getValue() != null)
.collect(Collectors.toMap(d -> adjustKeyFromTags(tags, d.getKey()),
d -> removeQuotesFromObject(d.getValue())));
}

@SuppressWarnings("unchecked")
private Object removeQuotesFromObject(final Object o) {
Object oo = o;
if (oo instanceof String) {
oo = removeQuotes((String) oo);
} else if (oo instanceof Collection) {

Collection<Object> list = new ArrayList<>();
for (Object obj : (Collection<Object>) oo) {
if (obj instanceof String) {
list.add(removeQuotes((String) obj));
}
}

oo = list;
}

return oo;
}

@Override
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -185,7 +185,7 @@ private static void createMockServer() throws IOException {

final int status = 200;

for (String item : Arrays.asList("1", "2", "3")) {
for (String item : Arrays.asList("1", "2", "3", "4")) {
String text = FileUtils.loadFile(mockServer, "/chatgpt/response" + item + ".json");
mockServer.when(request().withMethod("POST").withPath("/chatgpt" + item)).respond(
org.mockserver.model.HttpResponse.response(text).withStatusCode(Integer.valueOf(status)));
Expand Down Expand Up @@ -305,20 +305,20 @@ public void testDocumentTaggingAction02() throws Exception {
assertEquals(expectedSize, tags.getResults().size());

int i = 0;
assertEquals("Document Type", tags.getResults().get(i).getKey());
assertEquals("document type", tags.getResults().get(i).getKey());
assertEquals("Memorandum", tags.getResults().get(i++).getValue());

assertEquals("Location", tags.getResults().get(i).getKey());
assertEquals("location", tags.getResults().get(i).getKey());
assertEquals("YellowBelly Brewery Pub, St. Johns, NL", tags.getResults().get(i++).getValue());

assertEquals("Organization", tags.getResults().get(i).getKey());
assertEquals("organization", tags.getResults().get(i).getKey());
assertEquals("Great Auk Enterprises", tags.getResults().get(i++).getValue());

assertEquals("Person", tags.getResults().get(i).getKey());
assertEquals("person", tags.getResults().get(i).getKey());
assertEquals("Thomas Bewick,Ketill Ketilsson,Farley Mowat,Aaron Thomas",
String.join(",", tags.getResults().get(i++).getValues()));

assertEquals("Subject", tags.getResults().get(i).getKey());
assertEquals("subject", tags.getResults().get(i).getKey());
assertEquals("MINUTES OF A MEETING OF DIRECTORS", tags.getResults().get(i++).getValue());
}
}
Expand Down Expand Up @@ -491,6 +491,74 @@ public void testDocumentTaggingAction05() throws Exception {
}
}

/**
* Handle documentTagging ChatApt Action extra quotes.
*
* @throws Exception Exception
*/
@Test
public void testDocumentTaggingAction06() throws Exception {

initProcessor("fulltext", "chatgpt4");

for (String siteId : Arrays.asList(null, UUID.randomUUID().toString())) {
// given
configService.save(siteId, new DynamicObject(Map.of(CHATGPT_API_KEY, "asd")));

String documentId = UUID.randomUUID().toString();

DocumentItem item = new DocumentItemDynamoDb(documentId, new Date(), "joe");
item.setContentType("text/plain");

String s3Key = SiteIdKeyGenerator.createS3Key(siteId, documentId);
String content = "this is some data";
s3Service.putObject(BUCKET_NAME, s3Key, content.getBytes(StandardCharsets.UTF_8),
"text/plain");

documentService.saveDocument(siteId, item, null);
documentService.addTags(siteId, documentId, Arrays.asList(new DocumentTag(documentId,
"untagged", "", new Date(), "joe", DocumentTagType.SYSTEMDEFINED)), null);

List<Action> actions =
Arrays.asList(new Action().type(ActionType.DOCUMENTTAGGING).parameters(Map.of("engine",
"chatgpt", "tags", "organization,location,person,subject,sentiment,document type")));
actionsService.saveActions(siteId, documentId, actions);

Map<String, Object> map =
loadFileAsMap(this, "/actions-event01.json", "c2695f67-d95e-4db0-985e-574168b12e57",
documentId, "default", siteId != null ? siteId : "default");

// when
processor.handleRequest(map, this.context);

// then
final int expectedSize = 5;
assertEquals(ActionStatus.COMPLETE,
actionsService.getActions(siteId, documentId).get(0).status());

PaginationResults<DocumentTag> tags =
documentService.findDocumentTags(siteId, documentId, null, MAX_RESULTS);
assertEquals(expectedSize, tags.getResults().size());

int i = 0;
assertEquals("document type", tags.getResults().get(i).getKey());
assertEquals("Memorandum", tags.getResults().get(i++).getValue());

assertEquals("location", tags.getResults().get(i).getKey());
assertEquals("YellowBelly Brewery Pub, St. Johns, NL", tags.getResults().get(i++).getValue());

assertEquals("organization", tags.getResults().get(i).getKey());
assertEquals("Great Auk Enterprises", tags.getResults().get(i++).getValue());

assertEquals("person", tags.getResults().get(i).getKey());
assertEquals("Thomas Bewick,Ketill Ketilsson,Farley Mowat,Aaron Thomas",
String.join(",", tags.getResults().get(i++).getValues()));

assertEquals("subject", tags.getResults().get(i).getKey());
assertEquals("MINUTES OF A MEETING OF DIRECTORS", tags.getResults().get(i++).getValue());
}
}

/**
* Test converting Ocr Parse Types.
*/
Expand Down
22 changes: 22 additions & 0 deletions lambda-s3/src/test/resources/chatgpt/response4.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
{
"id": "cmpl-7HOmfBlUrif1MEDOLEFEtJ44A9iGq",
"object": "text_completion",
"created": 1684381201,
"model": "text-davinci-003",
"choices":
[
{
"text": "\n\n{\n \"Organization\": [\"'Great Auk Enterprises'\"],\n \"Location\": [\"YellowBelly Brewery Pub, St. Johns, NL\"],\n \"Person\": [\"Thomas Bewick\", \"Ketill Ketilsson\", \"Farley Mowat\", \"Aaron Thomas\"], \n \"Subject\": [\"MINUTES OF A MEETING OF DIRECTORS\"], \n \"Sentiment\": [], \n \"Document_Type\": [\"Memorandum\"] \n}",
"index": 0,
"logprobs": null,
"finish_reason": "stop"
}
],
"usage":
{
"prompt_tokens": 255,
"completion_tokens": 106,
"total_tokens": 361
}
}

0 comments on commit aeeb1d2

Please sign in to comment.