Skip to content

Commit

Permalink
feature: facebook messenger parser
Browse files Browse the repository at this point in the history
  • Loading branch information
marcus6n committed Jul 18, 2024
1 parent a5e60df commit ade5bff
Show file tree
Hide file tree
Showing 4 changed files with 175 additions and 2 deletions.
10 changes: 8 additions & 2 deletions iped-parsers/iped-parsers-impl/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
</parent>
<artifactId>iped-parsers-impl</artifactId>
<packaging>jar</packaging>

<dependencies>
<dependency>
<groupId>org.slf4j</groupId>
Expand Down Expand Up @@ -242,8 +242,14 @@
<artifactId>ofx4j</artifactId>
<version>1.36</version>
</dependency>
<dependency>
<groupId>com.google.code.gson</groupId>
<artifactId>gson</artifactId>
<version>2.9.0</version>
<scope>compile</scope>
</dependency>
</dependencies>

<build>
<plugins>
<plugin>
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
package iped.parsers.facebook;

import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.nio.charset.StandardCharsets;
import java.util.Set;

import org.apache.tika.exception.TikaException;
import org.apache.tika.metadata.Metadata;
import org.apache.tika.mime.MediaType;
import org.apache.tika.parser.ParseContext;
import org.apache.tika.parser.Parser;
import org.apache.tika.sax.XHTMLContentHandler;
import org.xml.sax.ContentHandler;
import org.xml.sax.SAXException;

import com.google.gson.JsonArray;
import com.google.gson.JsonObject;
import com.google.gson.JsonParser;

public class FacebookMessengerJSONParser implements Parser {

private static final long serialVersionUID = 1L;
private static final Set<MediaType> SUPPORTED_TYPES = MediaType.set(MediaType.application("json"));

@Override
public Set<MediaType> getSupportedTypes(ParseContext arg0) {
return SUPPORTED_TYPES;
}

@Override
public void parse(InputStream stream, ContentHandler handler, Metadata metadata, ParseContext context) throws IOException, SAXException, TikaException {

XHTMLContentHandler xhtml = new XHTMLContentHandler(handler, metadata);
xhtml.startDocument();

try (BufferedReader reader = new BufferedReader(new InputStreamReader(stream, StandardCharsets.UTF_8))) {
JsonObject jsonObject = JsonParser.parseReader(reader).getAsJsonObject();
JsonArray messages = jsonObject.getAsJsonArray("messages");

for (int i = 0; i < messages.size(); i++) {
JsonObject message = messages.get(i).getAsJsonObject();
String sender = message.has("sender_name") ? message.get("sender_name").getAsString() : "Unknown";
String content = message.has("content") ? message.get("content").getAsString() : "No Content";
String timestamp = message.has("timestamp_ms") ? message.get("timestamp_ms").getAsString() : "No Timestamp";

xhtml.element("p", String.format("Sender: %s\nMessage: %s\nTimestamp: %s\n", sender, content, timestamp));
}
} finally {
xhtml.endDocument();
}
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
package iped.parsers.facebook;

import java.io.InputStream;

import org.apache.tika.metadata.Metadata;
import org.apache.tika.parser.ParseContext;
import org.apache.tika.sax.ToTextContentHandler;
import org.junit.Test;

import junit.framework.TestCase;

public class FacebookMessengerJSONParserTest extends TestCase {

@Test
public void testParse() throws Exception {

FacebookMessengerJSONParser parser = new FacebookMessengerJSONParser();
ToTextContentHandler handler = new ToTextContentHandler();
Metadata metadata = new Metadata();
ParseContext context = new ParseContext();

try (InputStream stream = getClass().getResourceAsStream("/test-files/test_facebookMessenger.json")) {
assertNotNull("Input stream should not be null", stream);
parser.parse(stream, handler, metadata, context);

// Asserts
String parsedContent = handler.toString().trim();
String[] lines = parsedContent.lines().toArray(String[]::new);

// Verifique se os conteúdos esperados estão presentes
assertTrue(parsedContent.contains("Sender: Guilherme David"));
assertTrue(parsedContent.contains("Message: Voc\u00c3\u00aa n\u00c3\u00a3o respondeu a: Boa tarde, deixe seu contato que o responsavel pelo imovel vai te passar todas informa\u00c3\u00a7\u00c3\u00b5es"));
assertTrue(parsedContent.contains("Timestamp: 1718225887428"));

assertTrue(parsedContent.contains("Sender: Guilherme David"));
assertTrue(parsedContent.contains("Message: Boa tarde, deixe seu contato que o responsavel pelo imovel vai te passar todas informa\u00c3\u00a7\u00c3\u00b5es"));
assertTrue(parsedContent.contains("Timestamp: 1718220406722"));

assertTrue(parsedContent.contains("Sender: Marcus De Oliveira"));
assertTrue(parsedContent.contains("Message: Ol\u00c3\u00a1, esse item ainda est\u00c3\u00a1 dispon\u00c3\u00advel?"));
assertTrue(parsedContent.contains("Timestamp: 1717920369981"));
}
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,68 @@
{
"participants": [
{
"name": "Marcus De Oliveira"
},
{
"name": "Guilherme David"
}
],
"messages": [
{
"sender_name": "Guilherme David",
"timestamp_ms": 1718225887428,
"content": "Voc\u00c3\u00aa n\u00c3\u00a3o respondeu a: Boa tarde, deixe seu contato que o responsavel pelo imovel vai te passar todas informa\u00c3\u00a7\u00c3\u00b5es",
"is_geoblocked_for_viewer": false
},
{
"sender_name": "Guilherme David",
"timestamp_ms": 1718220406722,
"content": "Boa tarde, deixe seu contato que o responsavel pelo imovel vai te passar todas informa\u00c3\u00a7\u00c3\u00b5es",
"is_geoblocked_for_viewer": false
},
{
"sender_name": "Guilherme David",
"timestamp_ms": 1718220404074,
"content": "Sim, voc\u00c3\u00aa ainda tem interesse?",
"is_geoblocked_for_viewer": false
},
{
"sender_name": "Marcus De Oliveira",
"timestamp_ms": 1718049971976,
"content": "Classificados semelhantes a \"3 quartos 2 banheiros \u00e2\u0080\u0093 Casa\"",
"is_geoblocked_for_viewer": false
},
{
"sender_name": "Marcus De Oliveira",
"timestamp_ms": 1717920369981,
"content": "Ol\u00c3\u00a1, esse item ainda est\u00c3\u00a1 dispon\u00c3\u00advel?",
"is_geoblocked_for_viewer": false
},
{
"sender_name": "Marcus De Oliveira",
"timestamp_ms": 1717920367339,
"content": "Voc\u00c3\u00aa mudou a foto do grupo.",
"is_geoblocked_for_viewer": false
},
{
"sender_name": "Marcus De Oliveira",
"timestamp_ms": 1717920366902,
"content": "Voc\u00c3\u00aa deu o nome \"Marcus \u00c2\u00b7 3 quartos 2 banheiros \u00e2\u0080\u0093 Casa\" ao grupo.",
"is_geoblocked_for_viewer": false
}
],
"title": "Marcus \u00c2\u00b7 3 quartos 2 banheiros \u00e2\u0080\u0093 Casa",
"is_still_participant": true,
"thread_path": "inbox/marcus3quartos2banheiroscasa_7442173665892250",
"magic_words": [

],
"image": {
"uri": "your_facebook_activity/messages/photos/435917623_1194274758404788_8125811693636917451_n_844277654271524.jpg",
"creation_timestamp": 1717920365
},
"joinable_mode": {
"mode": 1,
"link": ""
}
}

0 comments on commit ade5bff

Please sign in to comment.