Skip to content

Commit

Permalink
Merging next to main for release 2.5.3 (#25)
Browse files Browse the repository at this point in the history
* [PAPP-33470] Bugfix: Recurrent multipart parsing (#20)

* recurrent multipart parsing

* Update README.md

* linting fix

* fix for "in_attachment" traversal

* ph_status -> ret_val

* applying suggestions

---------

Co-authored-by: splunk-soar-connectors-admin <admin@splunksoar>

* Bumped up the version of googleworkspaceforgmail from 2.5.2 to 2.5.3

* proper release notes, function naming convention (#24)

* Release notes for version 2.5.3

---------

Co-authored-by: mposluszny-splunk <150343546+mposluszny-splunk@users.noreply.github.com>
Co-authored-by: splunk-soar-connectors-admin <admin@splunksoar>
Co-authored-by: root <root@splunksoar>
  • Loading branch information
4 people committed Apr 8, 2024
1 parent 6134570 commit ee34a49
Show file tree
Hide file tree
Showing 4 changed files with 164 additions and 74 deletions.
3 changes: 2 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
# G Suite for GMail

Publisher: Splunk
Connector Version: 2.5.2
Connector Version: 2.5.3
Product Vendor: Google
Product Name: GMail
Product Version Supported (regex): ".\*"
Expand Down Expand Up @@ -295,6 +295,7 @@ PARAMETER | REQUIRED | DESCRIPTION | TYPE | CONTAINS
**email** | required | User's Email (Mailbox to search) | string | `email`
**internet_message_id** | required | Internet Message ID | string | `internet message id`
**extract_attachments** | optional | Add attachments to vault and create vault artifacts | boolean |
**extract_nested** | optional | Works when `extract_attachments` is set to `true`. Extracts attachments from nested email attachments. | boolean |

#### Action Output
DATA PATH | TYPE | CONTAINS | EXAMPLE VALUES
Expand Down
12 changes: 9 additions & 3 deletions gsgmail.json
Original file line number Diff line number Diff line change
Expand Up @@ -16,10 +16,10 @@
"latest_tested_versions": [
"Cloud, May 26, 2023"
],
"app_version": "2.5.2",
"app_version": "2.5.3",
"product_version_regex": ".*",
"license": "Copyright (c) 2017-2024 Splunk Inc.",
"utctime_updated": "2024-03-18T08:57:36.000000Z",
"utctime_updated": "2024-04-05T12:12:46.000000Z",
"configuration": {
"login_email": {
"required": true,
Expand Down Expand Up @@ -866,6 +866,12 @@
"data_type": "boolean",
"default": false,
"order": 2
},
"extract_nested": {
"description": "Works when `extract_attachments` is set to `true`. Extracts attachments from nested email attachments.",
"data_type": "boolean",
"default": false,
"order": 3
}
},
"output": [
Expand Down Expand Up @@ -1281,4 +1287,4 @@
}
]
}
}
}
219 changes: 149 additions & 70 deletions gsgmail_connector.py
Original file line number Diff line number Diff line change
Expand Up @@ -252,75 +252,6 @@ def _get_email_headers_from_part(self, part):

return dict(headers)

def _parse_multipart_msg(self, action_result, msg, email_details, extract_attachments=False):
plain_bodies = []
html_bodies = []
container_id = self.get_container_id()

email_details['email_headers'] = []
for part in msg.walk():
part_type = part.get_content_type()
headers = self._get_email_headers_from_part(part)
# split out important headers (for output table rendering)
if headers.get('to'):
email_details['to'] = headers.get('to')

if headers.get('from'):
email_details['from'] = headers.get('from')

if headers.get('subject'):
email_details['subject'] = headers.get('subject')

disp = str(part.get('Content-Disposition'))
file_name = part.get_filename()
# look for plain text parts, but skip attachments
if part_type == 'text/plain' and 'attachment' not in disp:
charset = part.get_content_charset() or 'utf8'
# decode the base64 unicode bytestring into plain text
plain_body = part.get_payload(decode=True).decode(encoding=charset, errors="ignore")
# Add to list of plan text bodies
plain_bodies.append(plain_body)
if part_type == 'text/html' and 'attachment' not in disp:
charset = part.get_content_charset() or 'utf8'
# decode the base64 unicode bytestring into plain text
html_body = part.get_payload(decode=True).decode(encoding=charset, errors="ignore")
# Add to list of html bodies
html_bodies.append(html_body)
elif file_name and extract_attachments:
attach_resp = None
try:
if part_type.startswith("message/"):
content = part.get_payload(0).as_string()
else:
content = part.get_payload(decode=True)
# Create vault item with attachment payload
attach_resp = Vault.create_attachment(content, container_id=container_id, file_name=file_name)
except Exception as e:
message = self._get_error_message_from_exception(e)
return action_result.set_status(phantom.APP_ERROR, f"Unable to add attachment: {file_name} Error: {message}")
if attach_resp.get('succeeded'):
# Create vault artifact
artifact = {
'name': 'Email Attachment Artifact',
'container_id': container_id,
'cef': {
'vaultId': attach_resp[phantom.APP_JSON_HASH],
'fileHash': attach_resp[phantom.APP_JSON_HASH],
'file_hash': attach_resp[phantom.APP_JSON_HASH],
'fileName': file_name
},
'run_automation': False,
'source_data_identifier': None
}
ret_val, msg, _ = self.save_artifact(artifact)
if phantom.is_fail(ret_val):
return action_result.set_status(phantom.APP_ERROR, "Could not save artifact to container: {}".format(msg))
email_details['email_headers'].append(headers)
email_details['parsed_plain_body'] = '\n\n'.join(plain_bodies)
email_details['parsed_html_body'] = '\n\n'.join(html_bodies)

return phantom.APP_SUCCESS

def _handle_run_query(self, param):

# Implement the handler here, some basic code is already in
Expand Down Expand Up @@ -407,6 +338,148 @@ def _handle_run_query(self, param):

return action_result.set_status(phantom.APP_SUCCESS)

def _body_from_part(self, part):
charset = part.get_content_charset() or "utf-8"
# decode the base64 unicode bytestring into plain text
return part.get_payload(decode=True).decode(
encoding=charset, errors="ignore"
)

def _create_artifact(self, file_name, attach_resp):
return {
"name": "Email Attachment Artifact",
"container_id": self.get_container_id(),
"cef": {
"vaultId": attach_resp[phantom.APP_JSON_HASH],
"fileHash": attach_resp[phantom.APP_JSON_HASH],
"file_hash": attach_resp[phantom.APP_JSON_HASH],
"fileName": file_name,
},
"run_automation": False,
"source_data_identifier": None,
}

def _parse_email_details(self, part, email_details):
headers = self._get_email_headers_from_part(part)
# split out important headers (for output table rendering)
if headers.get("to"):
email_details["to"] = headers["to"]

if headers.get("from"):
email_details["from"] = headers["from"]

if headers.get("subject"):
email_details["subject"] = headers["subject"]

part_type = part.get_content_type()
if part_type == "text/plain":
email_details["plain_bodies"].append(self._body_from_part(part))
elif part_type == "text/html":
email_details["html_bodies"].append(self._body_from_part(part))

email_details["email_headers"].append(headers)

def _get_payload_content(self, part):
if part.get_content_type().startswith("message/"):
return part.get_payload(0).as_string()
return part.get_payload(decode=True)

def _extract_attachment(self, part, action_result):
attach_resp = None
file_name = part.get_filename()
try:
# Create vault item with attachment payload
attach_resp = Vault.create_attachment(
self._get_payload_content(part),
container_id=self.get_container_id(),
file_name=file_name,
)
except Exception as e:
return action_result.set_status(
phantom.APP_ERROR,
f"Unable to add attachment: {file_name} Error: {self._get_error_message_from_exception(e)}",
)
if attach_resp.get("succeeded"):
# Create vault artifact
ret_val, msg, _ = self.save_artifact(
self._create_artifact(file_name, attach_resp)
)
if phantom.is_fail(ret_val):
return action_result.set_status(
phantom.APP_ERROR,
f"Could not save artifact to container: {msg}",
)
return phantom.APP_SUCCESS

@staticmethod
def _is_attachment(part):
return "attachment" in str(part.get("Content-Disposition"))

def _init_detail_fields(self, email_details):
email_details["plain_bodies"] = []
email_details["html_bodies"] = []
email_details["email_headers"] = []

def _join_email_bodies(self, email_details):
email_details["parsed_plain_body"] = "\n\n".join(
email_details.pop("plain_bodies")
)
email_details["parsed_html_body"] = "\n\n".join(
email_details.pop("html_bodies")
)

def __recursive_part_traverse(
self,
part,
email_details,
action_result,
extract_attachments=False,
extract_nested=False,
in_attachment=False,
):
is_attachment = self._is_attachment(part)
# We are only gathering email data from top email, any attachment email should be omitted
if not is_attachment and not in_attachment:
self._parse_email_details(part, email_details)

ret_val = phantom.APP_SUCCESS

if is_attachment and extract_attachments:
ret_val = self._extract_attachment(part, action_result)
if phantom.is_fail(ret_val):
return ret_val

if not extract_nested and is_attachment:
return ret_val

if part.is_multipart():
for subpart in part.get_payload():
# We assume that everything that is under attachment is also an attachment
ret_val = ret_val and self.__recursive_part_traverse(
subpart,
email_details,
action_result,
extract_attachments,
extract_nested,
is_attachment or in_attachment,
)
return ret_val

def _parse_multipart_message(
self,
action_result,
msg,
email_details,
extract_attachments=False,
extract_nested=False,
):
self._init_detail_fields(email_details)
ret_val = self.__recursive_part_traverse(
msg, email_details, action_result, extract_attachments, extract_nested
)
self._join_email_bodies(email_details)
return ret_val

def _handle_get_email(self, param):

self.save_progress("In action handler for: {0}".format(self.get_action_identifier()))
Expand Down Expand Up @@ -449,7 +522,13 @@ def _handle_get_email(self, param):
msg = email.message_from_bytes(raw_encoded)

if msg.is_multipart():
ret_val = self._parse_multipart_msg(action_result, msg, email_details_resp, param.get('extract_attachments', False))
ret_val = self._parse_multipart_message(
action_result,
msg,
email_details_resp,
param.get("extract_attachments", False),
param.get("extract_nested", False),
)

if phantom.is_fail(ret_val):
return action_result.get_status()
Expand Down
4 changes: 4 additions & 0 deletions release_notes/2.5.3.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
* [PAPP-33478] Multipart message parsing improvement.
* Implemented parsing nested attachments.
* Fixed email attachments overriding main email metadata.
* Added `extract_nested` action, which creates artifacts from attachments from nested email attachments. Works only when `extract_attachments` is set to `true`.

0 comments on commit ee34a49

Please sign in to comment.