-
Notifications
You must be signed in to change notification settings - Fork 0
/
compiler.py
executable file
·105 lines (102 loc) · 3.91 KB
/
compiler.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
import sys, json, os
from io import StringIO, BytesIO
"""
对file文件进行词法分析,将结果以格式化的形式写入
当前目录下的result.txt文件中.
"""
def scaner(file):
digits = {
'0': 0, '1': 1, '2': 2, '3': 3, '4': 4,
'5': 5, '6': 6, '7': 7, '8': 8, '9': 9
}
# 获取编码文件并序列化为字典
with open('./complier.json', 'r') as f:
code_file = json.loads(f.read())
# 字节流才能使用seek()改变文件指针pos
with open(file, 'rb') as f:
program_file = BytesIO(f.read())
# 每次从file中读取制定字节数,如果文件不存在会自动创建
result_file = open('./result.txt', 'at+')
# 如果文件不为空则删除文件内容
if os.path.getsize('./result.txt') > 0:
result_file.seek(0)
result_file.truncate()
# result: 内存中用来存放结果的数据流
char2memory = StringIO('')
char_from_file = lambda size=1: (program_file.read(size)).decode('utf-8')
format_string = lambda key, token: "(%d, %s)\n"%(code_file[key], token)
ch = char_from_file()
while ch:
# token: 用来连接读取到的字符
token = ''
while ch in [' ', '\n', '\t']:
ch = char_from_file()
# 是否以字母开始或全是字母
if ch.isalpha():
token += ch
ch = char_from_file()
# 是否全是字母或数字
while ch.isalpha() or ch in digits:
token += ch
ch = char_from_file()
# 如果不在编码文件中,则为用户自定义标志符,否则为保留字
if token not in code_file:
char2memory.write(format_string("id", token))
else:
char2memory.write(format_string(token, token))
elif ch in digits:
token += ch
ch = char_from_file()
# TODO: 判断负数与小数
while ch in digits:
token += ch
ch = char_from_file()
# 将字符串转换为二进制存储,二进制以0b开头,故使用[2:]
# TODO: 如果转换为二进制在语法分析师如果超过1位数,例如10则会找不到产生时
# char2memory.write(format_string("num", bin(int(token))[2:]))
char2memory.write(format_string("num", token))
elif ch in [',', ';', '+', '-', '*', '=', '.']:
char2memory.write(format_string(ch, ch))
ch = char_from_file()
elif ch in ['>', '<', ':']:
token += ch
# 使用_ch代替ch进行判断,否则code_file[ch]会发生KeyError:ch
_ch = char_from_file()
if _ch == '=':
token += _ch
char2memory.write(format_string(token, token))
ch = char_from_file()
else:
program_file.seek(-1, 1)
char2memory.write(format_string(ch, ch))
ch = char_from_file()
elif ch == '/':
_ch = char_from_file()
if _ch == '/':
program_file.readline()
ch = char_from_file()
elif _ch == '*':
while _ch:
if char_from_file() == '*' and char_from_file() == '/':
ch = char_from_file()
break
else:
program_file.seek(-1, 1)
char2memory.write(format_string(ch, ch))
ch = char_from_file()
else:
print("unknown symbol %s"%ch)
break
result_file.write(char2memory.getvalue())
result_file.close()
def main():
if len(sys.argv) == 1:
file = input("请输入待分析文件路径:")
scaner(file)
elif (sys.argv[1]) and (len(sys.argv) == 2):
scaner(sys.argv[1])
else:
print("文件路径错误.")
main()