-
Notifications
You must be signed in to change notification settings - Fork 0
/
LexicalAnalysis.py
92 lines (82 loc) · 1.49 KB
/
LexicalAnalysis.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
#--------------------------------------------------------
# 1.py
#
# Lexixal analysis
#-------------------------------------------------------
import ply.lex as lex
# List of token names. This is always required
tokens =[
'ID',
'EQUAL',
'NUMBER',
'LPAREN',
'RPAREN',
'MINUS',
'LESS',
'STREAM',
'STRING',
'LKUAI',
'RKUAI',
'PLUS',
]
reserved = {
'int' : 'INT',
'if' : 'IF',
'while' : 'WHILE',
'cout' : 'COUT',
'endl' : 'ENDL',
}
tokens = tokens + list(reserved.values())
# Regular expression rules for simple tokens
t_INT = r'int'
t_EQUAL = r'='
t_LPAREN = r'\('
t_RPAREN = r'\)'
t_MINUS = r'-'
t_LESS = r'\<'
t_STREAM = r'\<\<'
t_LKUAI = r'\{'
t_RKUAI = r'\}'
t_PLUS = r'\+'
# A regular ecxpression rule with some action code
def t_NUMBER(t):
r'\d+'
t.value = int(t.value)
return t
def t_ID(t):
r'[a-zA-Z]\w*'
t.type = reserved.get(t.value,'ID')
return t
def t_STRING(t):
r'\".*?\"'
return t
# Define a rule so we can track line numbers
def t_newline(t):
r'\n'
t.lexer.lineno += len(t.value)
# A string containing ignored characters (space and tabs and ;)
t_ignore = ' ;\t'
# Error handing rule
def t_error(t):
print "Illegal character '%s'" % t.value[0]
t.lexer.skip(1)
# Build the lexer
lexer = lex.lex()
# Test it out
data = '''
int asd = 0;
int bc = 10;
while ( asd < bs)
{
if(bc - asd < 2)
cout<<"they are close."<<endl;
asd = asd + 1;
}
'''
# Give the lexer some input
lexer.input(data)
# Tokenize
while True:
tok = lexer.token()
if not tok: break # No more input
print tok