-
Notifications
You must be signed in to change notification settings - Fork 0
/
scanner.py
75 lines (66 loc) · 2.38 KB
/
scanner.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
import sys
from enum import Enum, auto
from tokentype import TokenType
# Scanner State
class State(Enum):
NONE = auto()
DIGIT = auto()
ALPHA = auto()
# Lexical Analyzer
class Scanner:
def __init__(self, code: str):
self.code = code
self.state: State = State.NONE
self.buffer = []
self.tokens: [TokenType, str] = []
self.scan_tokens()
def add_token(self, token_type: TokenType, value: str):
self.tokens.append((token_type, value))
def scan_tokens(self):
for c in self.code:
if c.isdigit():
if self.state == State.ALPHA:
self.consume_alpha()
self.buffer.append(c)
self.state = State.DIGIT
elif c.isalpha():
if self.state == State.DIGIT:
self.consume_number()
self.buffer.append(c)
self.state = State.ALPHA
else:
if self.state == State.ALPHA:
self.consume_alpha()
elif self.state == State.DIGIT:
self.consume_number()
self.state = State.NONE
if c in ['(', ')']:
self.add_token(TokenType.PARENTHESES, c)
elif c in ['{', '}']:
self.add_token(TokenType.BRACKET, c)
elif c in ['<', '+', '*', '=']:
self.add_token(TokenType.OPERATOR, c)
elif c in [';']:
self.add_token(TokenType.SEMICOLON, c)
elif c not in [' ', '\n', '\t']:
sys.exit(f"Scanner Error: Undefined Character {c.encode()}")
def consume_alpha(self):
word = ''.join(self.buffer)
if word in ['int', 'char']:
self.add_token(TokenType.TYPE, word)
elif word in ['IF', 'THEN', 'ELSE']:
self.add_token(TokenType.STATEMENT, word)
elif word in ['EXIT']:
self.add_token(TokenType.EXIT, word)
else:
self.add_token(TokenType.WORD, word)
self.buffer.clear()
def consume_number(self):
num = int(''.join(self.buffer))
self.add_token(TokenType.NUMBER, num)
self.buffer.clear()
def print_tokens(self):
print("==== GENERATED TOKENS ====")
for t in self.tokens:
print(f"<{t[0].name}, '{t[1]}'>")
print()