-
Notifications
You must be signed in to change notification settings - Fork 969
/
utils.py
259 lines (197 loc) · 7.59 KB
/
utils.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
# Mobile Verification Toolkit (MVT)
# Copyright (c) 2021-2023 The MVT Authors.
# Use of this software is governed by the MVT License 1.1 that can be found at
# https://license.mvt.re/1.1/
import cProfile
import datetime
import hashlib
import json
import logging
import os
import re
from typing import Any, Iterator, Union
from rich.logging import RichHandler
class CustomJSONEncoder(json.JSONEncoder):
"""
Custom JSON encoder to handle non-standard types.
Some modules are storing non-UTF-8 bytes in their results dictionaries.
This causes exceptions when the results are being encoded as JSON.
Of course this means that when MVT is run via `check-iocs` with existing
results, the encoded version will be loaded back into the dictionary.
Modules should ensure they encode anything that needs to be compared
against an indicator in a JSON-friendly type.
"""
def default(self, o):
if isinstance(o, bytes):
# Decode as utf-8, replace any invalid UTF-8 bytes with escaped hex
return o.decode("utf-8", errors="backslashreplace")
# For all other types try to use the string representation.
return str(o)
def convert_chrometime_to_datetime(timestamp: int) -> datetime.datetime:
"""Converts Chrome timestamp to a datetime.
:param timestamp: Chrome timestamp as int.
:type timestamp: int
:returns: datetime.
"""
epoch_start = datetime.datetime(1601, 1, 1)
delta = datetime.timedelta(microseconds=timestamp)
return epoch_start + delta
def convert_datetime_to_iso(date_time: datetime.datetime) -> str:
"""Converts datetime to ISO string.
:param datetime: datetime.
:type datetime: datetime.datetime
:returns: ISO datetime string in YYYY-mm-dd HH:MM:SS.ms format.
:rtype: str
"""
try:
return date_time.strftime("%Y-%m-%d %H:%M:%S.%f")
except Exception:
return ""
def convert_unix_to_utc_datetime(
timestamp: Union[int, float, str]
) -> datetime.datetime:
"""Converts a unix epoch timestamp to UTC datetime.
:param timestamp: Epoc timestamp to convert.
:type timestamp: int
:returns: datetime.
"""
return datetime.datetime.utcfromtimestamp(float(timestamp))
def convert_unix_to_iso(timestamp: Union[int, float, str]) -> str:
"""Converts a unix epoch to ISO string.
:param timestamp: Epoc timestamp to convert.
:type timestamp: int
:returns: ISO datetime string in YYYY-mm-dd HH:MM:SS.ms format.
:rtype: str
"""
try:
return convert_datetime_to_iso(convert_unix_to_utc_datetime(timestamp))
except Exception:
return ""
def convert_mactime_to_datetime(timestamp: Union[int, float], from_2001: bool = True):
"""Converts Mac Standard Time to a datetime.
:param timestamp: MacTime timestamp (either int or float).
:type timestamp: int
:param from_2001: bool: Whether to (Default value = True)
:param from_2001: Default value = True)
:returns: datetime.
"""
if not timestamp:
return None
# This is to fix formats in case of, for example, SMS messages database
# timestamp format.
if isinstance(timestamp, int) and len(str(timestamp)) == 18:
timestamp = int(str(timestamp)[:9])
# MacTime counts from 2001-01-01.
if from_2001:
timestamp = timestamp + 978307200
# TODO: This is rather ugly. Happens sometimes with invalid timestamps.
try:
return convert_unix_to_utc_datetime(timestamp)
except Exception:
return None
def convert_mactime_to_iso(timestamp: int, from_2001: bool = True):
"""Wraps two conversions from mactime to iso date.
:param timestamp: MacTime timestamp (either int or float).
:type timestamp: int
:param from_2001: bool: Whether to (Default value = True)
:param from_2001: Default value = True)
:returns: ISO timestamp string in YYYY-mm-dd HH:MM:SS.ms format.
:rtype: str
"""
return convert_datetime_to_iso(convert_mactime_to_datetime(timestamp, from_2001))
def check_for_links(text: str) -> list:
"""Checks if a given text contains HTTP links.
:param text: Any provided text.
:type text: str
:returns: Search results.
"""
return re.findall(r"(?P<url>https?://[^\s]+)", text, re.IGNORECASE)
# Note: taken from here:
# https://stackoverflow.com/questions/57014259/json-dumps-on-dictionary-with-bytes-for-keys
def keys_bytes_to_string(obj: Any) -> Any:
"""Convert object keys from bytes to string.
:param obj: Object to convert from bytes to string.
:returns: Object converted to string.
:rtype: str
"""
new_obj = {}
if not isinstance(obj, dict):
if isinstance(obj, (tuple, list, set)):
value = [keys_bytes_to_string(x) for x in obj]
return value
return obj
for key, value in obj.items():
if isinstance(key, bytes):
key = key.decode()
if isinstance(value, dict):
value = keys_bytes_to_string(value)
elif isinstance(value, (tuple, list, set)):
value = [keys_bytes_to_string(x) for x in value]
new_obj[key] = value
return new_obj
def get_sha256_from_file_path(file_path: str) -> str:
"""Calculate the SHA256 hash of a file from a file path.
:param file_path: Path to the file to hash
:returns: The SHA256 hash string
"""
sha256_hash = hashlib.sha256()
try:
with open(file_path, "rb") as handle:
for byte_block in iter(lambda: handle.read(4096), b""):
sha256_hash.update(byte_block)
except OSError:
return ""
return sha256_hash.hexdigest()
def generate_hashes_from_path(path: str, log) -> Iterator[dict]:
"""
Generates hashes of all files at the given path.
:params path: Path of the given folder or file
:returns: generator of dict {"file_path", "hash"}
"""
if os.path.isfile(path):
hash_value = get_sha256_from_file_path(path)
yield {"file_path": path, "sha256": hash_value}
elif os.path.isdir(path):
for root, _, files in os.walk(path):
for file in files:
file_path = os.path.join(root, file)
try:
sha256 = get_sha256_from_file_path(file_path)
except FileNotFoundError:
log.error(
"Failed to hash the file %s: might be a symlink", file_path
)
continue
except PermissionError:
log.error(
"Failed to hash the file %s: permission denied", file_path
)
continue
yield {"file_path": file_path, "sha256": sha256}
def init_logging(verbose: bool = False):
"""
Initialise logging for the MVT module
"""
# Setup logging using Rich.
log = logging.getLogger("mvt")
log.setLevel(logging.DEBUG)
consoleHandler = RichHandler(show_path=False, log_time_format="%X")
consoleHandler.setFormatter(logging.Formatter("[%(name)s] %(message)s"))
if verbose:
consoleHandler.setLevel(logging.DEBUG)
else:
consoleHandler.setLevel(logging.INFO)
log.addHandler(consoleHandler)
def set_verbose_logging(verbose: bool = False):
log = logging.getLogger("mvt")
handler = log.handlers[0]
if verbose:
handler.setLevel(logging.DEBUG)
else:
handler.setLevel(logging.INFO)
def exec_or_profile(module, globals, locals):
"""Hook for profiling MVT modules"""
if int(os.environ.get("MVT_PROFILE", False)):
cProfile.runctx(module, globals, locals)
else:
exec(module, globals, locals)