-
Notifications
You must be signed in to change notification settings - Fork 1
/
main.py
89 lines (70 loc) · 2.47 KB
/
main.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
import csv
import datetime
from functools import cache
from pathlib import Path
from bs4 import BeautifulSoup
@cache
def today():
return datetime.datetime.today().date()
def parse(contents):
soup = BeautifulSoup(contents, "lxml")
for incident in soup.find("div", id="currentincidents").find_all("li"):
yield {
"category": incident.text,
"location": incident.attrs.get("location"),
"lat": incident.attrs.get("latitude"),
"lon": incident.attrs.get("longitude"),
}
def load_previous_data():
filename = Path(f"data/{today().year}/{str(today())}.csv")
if filename.exists():
yield from csv.DictReader(open(filename))
def merge(previous_incidents, incidents):
_previous_incidents = [
# remove datetime fields
{k: v for k, v in item.items() if k in incidents[0].keys()}
for item in previous_incidents
]
for incident in incidents:
if incident not in _previous_incidents:
yield incident
def gen_datetime_obj(freeze=None):
dt = freeze or datetime.datetime.utcnow()
return {
"first_seen_at_timestamp": dt.isoformat().split(".")[0],
"first_seen_date": dt.date().isoformat(),
"first_seen_weekday": dt.date().isoweekday(),
"first_seen_hour": dt.hour,
}
def write_data(previous_incidents, new_incidents, datetime_obj):
filename = f"data/{today().year}/{str(today())}.csv"
Path(filename).parent.mkdir(exist_ok=True)
with open(filename, "w") as csvfile:
writer = csv.DictWriter(
csvfile,
fieldnames=[
"first_seen_at_timestamp",
"first_seen_date",
"first_seen_weekday",
"first_seen_hour",
"category",
"location",
"lat",
"lon",
],
)
writer.writeheader()
for incident in previous_incidents:
writer.writerow(incident)
for incident in new_incidents:
incident.update(datetime_obj)
writer.writerow(incident)
print(f"{len(list(new_incidents))} incidents added.")
def main():
datetime_obj = gen_datetime_obj()
previous_incidents = list(load_previous_data())
with open("incidents.html") as fp:
new_incidents = merge(previous_incidents[-40:], list(parse(fp.read())))
write_data(previous_incidents, new_incidents, datetime_obj)
if __name__ == "__main__":
main()