-
Notifications
You must be signed in to change notification settings - Fork 0
/
fivefict.py
156 lines (127 loc) · 3.74 KB
/
fivefict.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
# Module to get the last 5 matches and the next 5 matches of a team
import ply.lex as lex
import ply.yacc as yacc
tokens = [
'BEGINPREV', 'BEGINNEXT',
'OPENHREF', 'CLOSEHREF', 'ABBR', 'CLOSEABBR', 'OPENDIV', 'CLOSEDIV', 'OPENTABLE', 'CLOSETABLE',
'CONTENT', 'GARBAGE', 'STAFF',
]
def t_BEGINPREV(t):
'''<span\sclass="mw-headline"\sid="2022">2022</span>'''
return t
def t_BEGINNEXT(t):
'''<span\sclass="mw-headline"\sid="2023">2023</span>'''
return t
def t_STAFF(t):
'''<span\sclass="mw-headline"\sid="Coaching_staff">Coaching\sstaff</span>'''
return t
def t_OPENTABLE(t):
r'<table.*?>'
return t
def t_CLOSETABLE(t):
r'</table>'
return t
def t_OPENDIV(t):
r'<div.*?>'
return t
def t_CLOSEDIV(t):
r'</div.*?>'
return t
def t_ABBR(t):
r'<abbr.*?>'
def t_CLOSEABBR(t):
r'</abbr>'
def t_OPENHREF(t):
r'<a.*?>'
return t
def t_CLOSEHREF(t):
r'</a>'
return t
def t_CONTENT(t):
'''[A-Za-z0-9ñáćéíóúü.: ]+'''
return t
def t_WHITESPACE(t):
'''[ ]+'''
def t_GARBAGE(t):
r'<.*?>'
def t_newline(t):
r'\n+'
t.lexer.lineno += len(t.value)
def t_error(t):
t.lexer.skip(1)
prevlist = []
futlist = []
# grammar to find fixtures section
def p_init(p):
'''init : before BEGINPREV divs BEGINNEXT futdivs STAFF
| before BEGINPREV OPENHREF CONTENT CLOSEHREF divs BEGINNEXT OPENHREF CONTENT CLOSEHREF futdivs STAFF
| before BEGINPREV divs STAFF
| before BEGINPREV OPENHREF CONTENT CLOSEHREF divs STAFF'''
# grammar to skip unwanted content
def p_before(p):
'''before : CONTENT before
| OPENHREF before
| CLOSEHREF before
| OPENDIV before
| CLOSEDIV before
| OPENTABLE before
| CLOSETABLE before
| '''
# grammar to find fixture table
def p_divs(p):
'''divs : OPENDIV anchors OPENTABLE skip CLOSETABLE CLOSEDIV divs
| '''
# grammar to extract each fixture from table
def p_anchors(p):
'''anchors : OPENHREF CONTENT CLOSEHREF CONTENT CONTENT CONTENT OPENHREF CONTENT CLOSEHREF'''
if len(p) > 1:
prevlist.append(p[2] + " vs " + p[8])
# grammar to extract next 5 fixtures section
def p_futdivs(p):
'''futdivs : OPENDIV futanchors OPENTABLE skip CLOSETABLE CLOSEDIV futdivs
| '''
# grammar to extract each fixture from table of future
def p_futanchors(p):
'''futanchors : OPENHREF CONTENT CLOSEHREF CONTENT CONTENT CONTENT OPENHREF CONTENT CLOSEHREF'''
if len(p) > 1:
# print(p[2] + " vs " + p[8])
futlist.append(p[2] + " vs " + p[8])
def p_skip(p):
'''skip : CONTENT skip
| OPENHREF skip
| CLOSEHREF skip
| OPENDIV skip
| CLOSEDIV skip
| '''
def p_error(p):
pass
def getfivefixtures(filename):
global prevlist
global futlist
prevlist = []
futlist = []
f = open(filename+".html", 'r', encoding='utf-8')
data = f.read()
lexer = lex.lex()
parser = yacc.yacc()
lexer.input(data)
f.close()
parser.parse(data)
# checking if the list is greater than 5
if len(prevlist) > 5:
prevlist = prevlist[-5:]
if len(futlist) > 5:
futlist = futlist[:5]
# logging the results
with open('programlogs.txt', 'a', encoding='utf-8') as f:
f.write("Five fixtures:\t")
f.write(str(prevlist))
f.write(str(futlist))
print("================Last 5 matches================")
for i in prevlist:
print(i)
print("================Next 5 matches================")
for i in futlist:
print(i)
print("==============================================")
# getfivefixtures('United_States_men%27s_national_soccer_team')