-
Notifications
You must be signed in to change notification settings - Fork 3
/
appending_profil_caleg_dpr_prov.py
116 lines (88 loc) · 3.66 KB
/
appending_profil_caleg_dpr_prov.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
import os
import urllib.request
import ssl
import csv
import pandas as pd
from bs4 import BeautifulSoup
from util_logging import *
from urllib.error import URLError, HTTPError
def getData(indexOf, totalData, idCaleg, idPartai, idDapil, idPro):
is_success = False
message_string = "getting data id={} ({}/{})".format(idCaleg, indexOf, totalData)
try:
context = ssl._create_unverified_context()
url = "https://infopemilu.kpu.go.id/pileg2019/pencalonan/calon/{}".format(idCaleg)
print("requesting {}".format(url))
url_link = urllib.request.urlopen(url, context = context)
data = url_link.read().decode("utf8")
THIS_FOLDER = os.path.dirname(os.path.abspath(__file__))
soup = BeautifulSoup(data, "html.parser")
listDataRaw = soup.find_all("div", class_=["row no-margin", "row no-margin yellow-one"])
listData = list()
for i,item in enumerate(listDataRaw):
if (i<18):
listData.append(item)
content = [idCaleg, idPartai, idDapil, idPro]
contentData = [item.find("div", {'class' : 'col-sm-9'}).text for item in listData]
content.extend(contentData)
if (indexOf == '1'):
headers = ['id', 'idPartai', 'idDapil', 'idPro']
headersData = [header.find("div", {'class' : 'col-sm-3'}).text for header in listData]
headers.extend(headersData)
f = open('data_profil_caleg_prov.csv', 'w')
writer = csv.writer(f)
writer.writerow(headers)
writer.writerow(content)
else:
f = open('data_profil_caleg_prov.csv', 'a', encoding='utf-16')
writer = csv.writer(f)
writer.writerow(content)
is_success = True
message_string = "–––––– ({}/{}) idCaleg = {} generated ––––––".format(indexOf, totalData, idCaleg)
except HTTPError as e:
message_string = "ERROR !!! failed getting idCaleg = {} ({}/{}) - HTTPError cause : {}".format(idCaleg, indexOf, totalData, e)
is_success = False
except URLError as e:
message_string = "ERROR !!! failed getting idCaleg = {} ({}/{}) - URLError cause : {}".format(idCaleg, indexOf, totalData, e)
is_success = False
except Exception as e:
message_string = "ERROR !!! failed getting idCaleg = {} ({}/{}) - cause : {}".format(idCaleg, indexOf, totalData, e)
is_success = False
print(message_string)
pecker("", message_string)
return is_success
try:
total_data_generated = 0
data_generated = 0
data_error = 0
csv_file = 'data_caleg_prov.csv'
df = pd.read_csv(csv_file)
id_list = df['id'].tolist()
idPartai_list = df['idPartai'].tolist()
idDapil_list = df['idDapil'].tolist()
idPro_list = df['idPro'].tolist()
message_string = "=== START GENERATING PROFIL CALEG DPRD PROV ==="
print(message_string)
pecker("", message_string)
for i,idCaleg in enumerate(id_list):
if (getData(str(i+1), str(len(id_list)), str(idCaleg), idPartai_list[i], idDapil_list[i], idPro_list[i])):
data_generated += 1
else:
data_error += 1
total_data_generated = data_generated + data_error
message_string = "=== END GENERATING PROFIL CALEG DPRD PROV:: generated={}, error={}, total={} data generated ===".format(data_generated, data_error, total_data_generated)
except Exception as e:
message_string = "ERROR !!! Failed generating profil caleg dprd prov –– {}".format(str(e))
print(message_string)
pecker("", message_string)
TYPE_APPEND = input("1 - Single id\n 2 - From indexOf\nEnter appending type : ")
if (TYPE_APPEND == '1') :
total_data_generated = 0
data_generated = 0
data_error = 0
print("APPENDING SINGLE ID")
ID_CALEG = input("Enter idCaleg")
message_string = "===== START :: APPENDING PROFIL CALEG DPRD PROV ––BY SINGLE ID idCaleg={}=====".format(ID_CALEG)
print(message_string)
pecker(LOG_IMAGE_CALEG, message_string)
if (getData('1', '1', 'SINGLE', ''))