-
Notifications
You must be signed in to change notification settings - Fork 0
/
scrape.py
110 lines (77 loc) · 3.12 KB
/
scrape.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
import requests
import session
import re
import statistics
import pickle
from bs4 import BeautifulSoup
with open('sessionAuth.pkl', 'rb') as f:
authStrings = pickle.load(f)
curSession = session.Session()
curSession.authenticate(*authStrings)
def getPaymentHistory(loanid, orderid, noteid, savePage = False):
valueDict = { 'Completed' : 5.0,
'Completed - In Grace Period' : 3.0,
'Completed - Late (16-30 days)' : 2.0,
'Completed - Late (31-120 days)': 1.0,
'Not Received' : 0.0,
'Partial Payment - Late (31-120 days)' : 0.5}
params = {'showfoliofn':'true', 'loan_id':loanid, 'order_id':noteid, 'note_id':orderid}
r = curSession.request('POST','foliofn/browseNotesLoanPerf.action', query=params)
if savePage:
with open("debug.txt", "w") as f:
f.write(r.text)
soup = BeautifulSoup(r.text, "html.parser")
paymentHistory = soup.find_all(id="lcLoanPerfTable1")
scores = []
if paymentHistory == []: return None
for row in paymentHistory[0].find_all('tr'):
rowText = []
for cell in row.find_all('td'):
rowText.append(cell.string)
if len(rowText) == 0: continue # Nothing useful on the line
if rowText[-2].strip() == "Scheduled": continue # Don't apply this to the score.
try:
#print(rowText[-2].strip())
scores.append(valueDict[rowText[-2].strip()])
except:
pass
if len(scores) == 0: return None
finalScore = statistics.mean(scores)
#print("Final Score: ", finalScore)
return finalScore
def testFromFile(filename):
valueDict = { 'Completed' : 5.0,
'Completed - In Grace Period' : 3.0,
'Completed - Late (16-30 days)' : 2.0,
'Completed - Late (31-120 days)': 1.0,
'Not Received' : 0.0,
'Partial Payment - Late (31-120 days)' : 0.5}
with open(filename, "r") as f:
fileText = f.read()
soup = BeautifulSoup(fileText)
paymentHistory = soup.find_all(id="lcLoanPerfTable1")
scores = []
if paymentHistory == []: return None
for row in paymentHistory[0].find_all('tr'):
rowText = []
for cell in row.find_all('td'):
rowText.append(cell.string)
if len(rowText) == 0: continue
if rowText[-2].strip() == "Scheduled": continue
print(valueDict[rowText[-2].strip()])
try:
#print(rowText[-2].strip())
scores.append(valueDict[rowText[-2].strip()])
except:
pass
if len(scores) == 0: return None
finalScore = statistics.mean(scores)
#print("Final Score: ", finalScore)
return finalScore
if __name__ == "__main__":
#(7955128, 32984932, 13002276)
#(42303298, 73387695, 58192212)
#(42394992, 73872012, 58653233)
#(49188567, 81690314, 108565042)
print(getPaymentHistory(60095699, 94968643, 83410761, True))
#print(testFromFile("debug.txt"))