-
Notifications
You must be signed in to change notification settings - Fork 0
/
c-interpreter.c
1535 lines (1330 loc) · 47.4 KB
/
c-interpreter.c
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
#include <stdio.h>
#include <stdlib.h>
#include <memory.h>
#include <string.h>
#define int long long // work with 64bit target
//#define int intptr_t
int token; // current token
int token_val; // value of current token (mainly for number)
char *src, *old_src; // pointer to source code string;
int poolsize; // default size of text/data/stack
int line; // line number
int *text, // text segment
*old_text, // for dump text segment
*stack; // stack
char *data; // data segment
int *current_id, // current parsed ID
*symbols; // symbol table,这可以“看做”是个“结构体数组”
int *idmain; // the `main` function
int *pc, *bp, *sp, ax, cycle; // virtual machine registers
// instructions
enum { LEA ,IMM ,JMP ,CALL,JZ ,JNZ ,ENT ,ADJ ,LEV ,LI ,LC ,SI ,SC ,PUSH,
OR ,XOR ,AND ,EQ ,NE ,LT ,GT ,LE ,GE ,SHL ,SHR ,ADD ,SUB ,MUL ,DIV ,MOD ,
OPEN, READ, CLOS, PRTF, MALC, MSET, MCMP, EXIT };
/* tokens and classes (operators last and in precedence order)
这些代表的是token被解析之后的对应的分类,同一类的表示符设置同样的属性,
从129开始是因为先前定义了其他枚举变量防止冲突*/
//注意这里面小写的Char 和Int指的是token的类型或者class类型,用于赋值token 和 class
enum {
Num = 128, Fun, Sys, Glo, Loc, Id, //class
Char, Else, Enum, If, Int, Return, Sizeof, While, // type
Assign, Cond, Lor, Lan, Or, Xor, And, Eq, Ne, Lt, Gt, Le, Ge, Shl, Shr, Add, Sub, Mul, Div, Mod, Inc, Dec, Brak
};/*最后一行按顺序排列的是运算符的优先级,我们可以看到在后续调用express(Assign)原因是Assign
的运算符的优先级是最低的,根据优先级爬山算法我们可以逐步求解出表达式*/
// fields of identifier
enum {Token, Hash, Name, Type, Class, Value, GType, GClass, GValue, IdSize};
// basetypes of variable/function (本程序只支持 3 种基类型的变量)
//注意这里大写的 CHAR 和 INT 指的是变量或者是函数的返回类型,用于赋值type的枚举量
enum { CHAR, INT, PTR };
int basetype; // the type of a declaration, make it global for convenience
int expr_type; // the type of an expression
// function frame
//
// 0: arg 1
// 1: arg 2
// 2: arg 3
// 3: return address
// 4: old bp pointer <- index_of_bp
// 5: local var 1
// 6: local var 2
int index_of_bp; // index of bp pointer on stack
/* 词法分析
1. for lexical analysis;
2. get the next token;
3. will ignore spaces tabs etc.*/
void next() {
char *current_ch;
int hash;//用于计算哈希值的临时变量
/*parse token 不断读取字符
(事实上正儿八经的词法分析都在if 和 else if里,这里的src++用于跳过空格和不需要处理的字符)
因此事实上token用于是指向每一个字符的开头,而src则真正用于判断下一个字符*/
while (token = *src++)
{
if (token == '\n') ++line; //处理换行符,错误处理时打印行号
else if (token == '#')
{
// skip macro, because we will not support it
while (*src != 0 && *src != '\n') {src++;}
}
else if ((token >= 'a' && token <= 'z') || (token >= 'A' && token <= 'Z') || (token == '_'))
{ // parse identifier,处理内置的类型名,或者用户自定义的变量名(统称为 ID)
//这个其实是取到当前外层while循环中正在执行的字符地址,因为src已经被++了
current_ch = src - 1;
/* 1. 逐字母计算该符号(ID)的一整个哈希值 */
hash = token;
while ((*src >= 'a' && *src <= 'z') || (*src >= 'A' && *src <= 'Z') || (*src >= '0' && *src <= '9') || (*src == '_'))
{
hash = hash * 147 + *src++;
}
/* 2. 每次都用一个指针指向symbol table[] 的首地址,遍历符号表(用current_id作为迭代器) */
current_id = symbols;
while (current_id[Token]) //检索symbol_toble 的token属性 (注意这里current_id[Token]应该理解成current_id.token属性)
{
//如果当前identity已经在哈希表中, 并且名字相等(memcmp返回0,表示相等)
//注意memcmp的第三个参数 “ src - current_ch ” 两个地址相减,其实返回的是一个整数,表示地址相差多少个sizeof(char*)
if (current_id[Hash] == hash && !memcmp((char *)current_id[Name], current_ch, src - current_ch))
{ //找到对应的结构体, 并且 提前return
// if(current_id[Token] == Id) {printf("same id , line = %lld\n", line);}
token = current_id[Token];
return; //
}
//如果这个token不在哈希表,将current_id指针+9,即指向下一个symbol table结构体的首地址
current_id = current_id + IdSize;
}
/* 3. 如果最后遍历完整个symbol_table结构体数组,还是没有找到,就 create and store this new ID */
current_id[Name] = (int)current_ch; //current_ch是个地址, 若有需要可以通过Name到./text段找到该token的完整名称
current_id[Hash] = hash; //自定义类型变量名,存到symbol table中是它的 hash值 和名字
current_id[Token] = Id; // 若Token == Id ,则为用户自定义类型
token = current_id[Token];
return;
}
else if (token >= '0' && token <= '9')
{
// parse number, three kinds: dec(123) hex(0x123) oct(017)
token_val = token - '0';
if (token_val > 0)
{
//处理十进制数据
// dec, starts with [1-9]
while (*src >= '0' && *src <= '9')
{
token_val = token_val*10 + *src++ - '0';
}
}
else
{
// starts with 0
if (*src == 'x' || *src == 'X')
{
//处理十六进制数据
token = *++src;
while ((token >= '0' && token <= '9') || (token >= 'a' && token <= 'f') || (token >= 'A' && token <= 'F'))
{
//这里对十六进制的处理很妙 ('A' = 01000001 || 'a' = (01100001) & 15 = 00001111)
token_val = token_val * 16 + (token & 15) + (token >= 'A' ? 9 : 0);
token = *++src;
}
}
else
{
// 处理八进制数据
while (*src >= '0' && *src <= '7')
{
token_val = token_val*8 + *src++ - '0';
}
}
}
token = Num;
return;
}
else if (token == '\'' || token == '"' )
{ //处理字符或者字符串
// parse string literal, currently, the only supported escape
// character is '\n', store the string literal into data.
current_ch = data;
//如果是没有遇到是空字符或者没有遇到字符串结尾 "(处理字符串)
while (*src != 0 && *src != token)
{
token_val = *src++;
//如果遇到转义字符'\'
if (token_val == '\\')
{
token_val = *src++;// 跳过当前'\'
if (token_val == 'n') token_val = '\n'; //本程序支持特殊字符'\n' 的处理
}
//如果是字符串,在遇到字符串结尾 " 之前,将字符串字面值一个个存入data区
if (token == '"')
*data++ = token_val;
}
src++;
if (token == '"') //如果是字符串,将字符串首地址存到data区
token_val = (int)current_ch;
else // 如果是单个字符,直接存字符本身,属于是ascall码
token = Num;
return;
}//处理注释
else if (token == '/')
{
if (*src == '/')
{
// skip comments
while (*src != 0 && *src != '\n') {++src;}
} else
{
// divide operator
token = Div;
return;
}
}//处理其他字符
else if (token == '=')
{
// parse '==' or '='
if (*src == '=')
{
src ++;
token = Eq;
} else
{
token = Assign;
}
return;
}
else if (token == '+')
{
// parse '++' or '+'
if (*src == '+')
{
src ++;
token = Inc;
} else
{
token = Add;
}
return;
}
else if (token == '-')
{
// parse '--' or '-'
if (*src == '-')
{
src ++;
token = Dec;
} else
{
token = Sub;
}
return;
}
else if (token == '!')
{
// parse '!='
if (*src == '=')
{
src++;
token = Ne;
}
return;
}
else if (token == '<')
{
// parse '<=', '<<' or '<'
if (*src == '=') {
src ++;
token = Le; //less equal
} else if (*src == '<') {
src ++;
token = Shl; //shift left
} else {
token = Lt; //less than
}
return;
}
else if (token == '>')
{
// parse '>=', '>>' or '>'
if (*src == '=') {
src ++;
token = Ge;
} else if (*src == '>') {
src ++;
token = Shr;
} else {
token = Gt;
}
return;
}
else if (token == '|')
{
// parse '|' or '||'
if (*src == '|') {
src ++;
token = Lor; //logical or
} else {
token = Or; //bit or
}
return;
}
else if (token == '&')
{
// parse '&' and '&&'
if (*src == '&') {
src ++;
token = Lan; //logical and
} else {
token = And; //bit and
}
return;
}
else if (token == '^')
{
token = Xor;
return;
}
else if (token == '%')
{
token = Mod;
return;
}
else if (token == '*')
{
token = Mul;
return;
}
else if (token == '[')
{
token = Brak;
return;
}
else if (token == '?')
{
token = Cond;
return;
}
else if (token == '~' || token == ';' || token == '{' || token == '}' || token == '(' || token == ')' || token == ']' || token == ',' || token == ':')
{
// directly return the character as token;
return;
}
}
return;
}
//查看当前id(我统一把他们抽象成单词)和 传入的单词匹配,就往下解析后面的单词
void match(int tk) {
if (token == tk)
next();
else
{
printf("%lld: expected token: %lld\n", line, tk);
exit(-1);
}
}
/* 语法分析,这里用了优先级爬山法*/
void expression(int level)
{
/* expressions have various format.
but majorly can be divided into two parts: unit and operator
for example `(char) *a[10] = (int *) func(b > 0 ? 10 : 20);
`a[10]` is an unit while `*` is an operator.
`func(...)` in total is an unit.
//我们应该先去解析那些 单元 和 一元运算符
so we should first parse those unit and unary operators
//然后再去解析双目运算符
and then the binary ones
*/
// also the expression can be in the following types:
// 1. unit_unary ::= unit | unit unary_op | unary_op unit
// 2. expr ::= unit_unary (bin_op unit_unary ...)
// unit_unary()
int *id;
int tmp;
int *addr;
// 处理单目3运算符,前缀单目运算符都是右结合,后缀单目运算符都是左结合
{
//错误处理
if (!token)
{
printf("%lld: unexpected token EOF of expression\n", line);
exit(-1);
}
if (token == Num)
{//处理立即数
match(Num);
// emit code
*++text = IMM;
*++text = token_val;
expr_type = INT;
}
else if (token == '"')
{//处理字符串常量
// emit code
*++text = IMM;
*++text = token_val;
match('"');
/* 为了支持定义在两行的这种风格的字符串定义,做了如下处理
p = "first line"
"second line";
*/
// store the rest strings
while (token == '"') {
match('"');
}
// append the end of string character '\0', all the data are default
// to 0, so just move data one position forward.
data = (char *)(((int)data + sizeof(int)) & (-sizeof(int)));
expr_type = PTR;
}
else if (token == Sizeof)
{
// sizeof is actually an unary operator
// now only `sizeof(int)`, `sizeof(char)` and `sizeof(*...)` are
// supported.
match(Sizeof);
match('(');
expr_type = INT;
if (token == Int) {
match(Int);
} else if (token == Char) {
match(Char);
expr_type = CHAR;
}
//处理指针
while (token == Mul) {
match(Mul);
expr_type = expr_type + PTR;
}
match(')');
// emit code
*++text = IMM;
*++text = (expr_type == CHAR) ? sizeof(char) : sizeof(int);
expr_type = INT;
}
else if (token == Id)
{//处理函数,枚举,变量
// there are several type when occurs to Id
// but this is unit, so it can only be
// 1. function call
// 2. Enum variable
// 3. global/local variable
match(Id);
id = current_id;
//如果token后面紧接着是 ( 说明是个函数
if (token == '(')
{
// function call
match('(');
// pass in arguments
tmp = 0; // number of arguments
while (token != ')')
{
expression(Assign);
*++text = PUSH;
tmp ++;
if (token == ',') {
match(',');
}
}
match(')');
// emit code
if (id[Class] == Sys) {
// system functions
*++text = id[Value];
}
else if (id[Class] == Fun) {
// function call
*++text = CALL;
*++text = id[Value];
}
else {
printf("%lld: bad function call\n", line);
exit(-1);
}
// clean the stack for arguments
if (tmp > 0) {
*++text = ADJ;
*++text = tmp;
}
expr_type = id[Type];
}
//如果是个枚举类型
else if (id[Class] == Num)
{
// enum variable
*++text = IMM;
*++text = id[Value];
expr_type = INT;
}
// 如果是个变量名
else
{
if (id[Class] == Loc) {
*++text = LEA;
*++text = index_of_bp - id[Value];
}
else if (id[Class] == Glo) {
*++text = IMM;
*++text = id[Value];
}
else {
printf("%lld: undefined variable\n", line);
exit(-1);
}
// emit code, default behaviour is to load the value of the
// address which is stored in `ax`
expr_type = id[Type];
*++text = (expr_type == CHAR) ? LC : LI;
}
}
else if (token == '(')
{ //处理强制类型转换或者括号
// cast or parenthesis
match('(');
if (token == Int || token == Char) {
tmp = (token == Char) ? CHAR : INT; // cast type
match(token);
while (token == Mul) {
match(Mul);
tmp = tmp + PTR;
}
match(')');
expression(Inc); // cast has precedence as Inc(++)
expr_type = tmp;
} else {
// normal parenthesis
expression(Assign);
match(')');
}
}
else if (token == Mul)
{ //处理解引用
// dereference *<addr>
match(Mul);
expression(Inc); // dereference has the same precedence as Inc(++)
if (expr_type >= PTR) {
expr_type = expr_type - PTR;
} else {
printf("%lld: bad dereference\n", line);
exit(-1);
}
*++text = (expr_type == CHAR) ? LC : LI;
}
else if (token == And)
{ //处理取地址运算符
// get the address of
match(And);
expression(Inc); // get the address of
if (*text == LC || *text == LI) {
text --;
} else {
printf("%lld: bad address of\n", line);
exit(-1);
}
expr_type = expr_type + PTR;
}
else if (token == '!')
{
// not
match('!');
expression(Inc);
// emit code, use <expr> == 0
*++text = PUSH;
*++text = IMM;
*++text = 0;
*++text = EQ;
expr_type = INT;
}
else if (token == '~')
{
// bitwise not
match('~');
expression(Inc);
// emit code, use <expr> XOR -1
*++text = PUSH;
*++text = IMM;
*++text = -1;
*++text = XOR;
expr_type = INT;
}
else if (token == Add)
{ // 处理+
// +var, do nothing
match(Add);
expression(Inc);
expr_type = INT;
}
else if (token == Sub)
{
// -var
match(Sub);
if (token == Num) {
*++text = IMM;
*++text = -token_val;
match(Num);
} else {
*++text = IMM;
*++text = -1;
*++text = PUSH;
expression(Inc);
*++text = MUL;
}
expr_type = INT;
}
else if (token == Inc || token == Dec)
{ //处理++ 和 --
tmp = token;
match(token);
expression(Inc);
if (*text == LC) {
*text = PUSH; // to duplicate the address
*++text = LC;
} else if (*text == LI) {
*text = PUSH;
*++text = LI;
} else {
printf("%lld: bad lvalue of pre-increment\n", line);
exit(-1);
}
*++text = PUSH;
*++text = IMM;
*++text = (expr_type > PTR) ? sizeof(int) : sizeof(char);
*++text = (tmp == Inc) ? ADD : SUB;
*++text = (expr_type == CHAR) ? SC : SI;
}
else
{
printf("%lld: bad expression\n", line);
exit(-1);
}
}
//双目运算符和后缀运算符(逆波兰表达式)
// binary operator and postfix operators.
{
//根据运算符的优先级来计算表达式
while (token >= level) {
// handle according to current operator's precedence
tmp = expr_type;
if (token == Assign) {
// var = expr;
match(Assign);
if (*text == LC || *text == LI) {
*text = PUSH; // save the lvalue's pointer
} else {
printf("%lld: bad lvalue in assignment\n", line);
exit(-1);
}
expression(Assign);
expr_type = tmp;
*++text = (expr_type == CHAR) ? SC : SI;
}
else if (token == Cond) {
// expr ? a : b;
match(Cond);
*++text = JZ;
addr = ++text;
expression(Assign);
if (token == ':') {
match(':');
} else {
printf("%lld: missing colon in conditional\n", line);
exit(-1);
}
*addr = (int)(text + 3);
*++text = JMP;
addr = ++text;
expression(Cond);
*addr = (int)(text + 1);
}
else if (token == Lor) {
// logic or
match(Lor);
*++text = JNZ;
addr = ++text;
expression(Lan);
*addr = (int)(text + 1);
expr_type = INT;
}
else if (token == Lan) {
// logic and
match(Lan);
*++text = JZ;
addr = ++text;
expression(Or);
*addr = (int)(text + 1);
expr_type = INT;
}
else if (token == Or) {match(Or); *++text = PUSH;expression(Xor);*++text = OR; expr_type = INT;}
else if (token == Xor) {match(Xor);*++text = PUSH;expression(And);*++text = XOR;expr_type = INT;}
else if (token == And) {match(And);*++text = PUSH;expression(Eq); *++text = AND;expr_type = INT;}
else if (token == Eq) {match(Eq); *++text = PUSH;expression(Ne); *++text = EQ; expr_type = INT;}
else if (token == Ne) {match(Ne); *++text = PUSH;expression(Lt); *++text = NE; expr_type = INT;}
else if (token == Lt) {match(Lt); *++text = PUSH;expression(Shl);*++text = LT; expr_type = INT;}
else if (token == Gt) {match(Gt); *++text = PUSH;expression(Shl);*++text = GT; expr_type = INT;}
else if (token == Le) {match(Le); *++text = PUSH;expression(Shl);*++text = LE; expr_type = INT;}
else if (token == Ge) {match(Ge); *++text = PUSH;expression(Shl);*++text = GE; expr_type = INT;}
else if (token == Shl) {match(Shl);*++text = PUSH;expression(Add);*++text = SHL;expr_type = INT;}
else if (token == Shr) {match(Shr);*++text = PUSH;expression(Add);*++text = SHR;expr_type = INT;}
else if (token == Add) {match(Add);*++text = PUSH;expression(Mul);
expr_type = tmp;
if (expr_type > PTR)
{
// pointer type, and not `char *`
*++text = PUSH;
*++text = IMM;
*++text = sizeof(int);
*++text = MUL;
}
*++text = ADD;
}
else if (token == Sub) {match(Sub);*++text = PUSH;expression(Mul);
if (tmp > PTR && tmp == expr_type) {
// pointer subtraction
*++text = SUB;
*++text = PUSH;
*++text = IMM;
*++text = sizeof(int);
*++text = DIV;
expr_type = INT;
} else if (tmp > PTR) {
// pointer movement
*++text = PUSH;
*++text = IMM;
*++text = sizeof(int);
*++text = MUL;
*++text = SUB;
expr_type = tmp;
} else {
// numeral subtraction
*++text = SUB;
expr_type = tmp;
}
}
else if (token == Mul) {match(Mul);*++text = PUSH;expression(Inc);*++text = MUL;expr_type = tmp;}
else if (token == Div) {match(Div);*++text = PUSH;expression(Inc);*++text = DIV;expr_type = tmp;}
else if (token == Mod) {match(Mod);*++text = PUSH;expression(Inc);*++text = MOD;expr_type = tmp;}
else if (token == Inc || token == Dec) {
// postfix inc(++) and dec(--)
// we will increase the value to the variable and decrease it
// on `ax` to get its original value.
if (*text == LI) {
*text = PUSH;
*++text = LI;
}
else if (*text == LC) {
*text = PUSH;
*++text = LC;
}
else {
printf("%lld: bad value in increment\n", line);
exit(-1);
}
*++text = PUSH;
*++text = IMM;
*++text = (expr_type > PTR) ? sizeof(int) : sizeof(char);
*++text = (token == Inc) ? ADD : SUB;
*++text = (expr_type == CHAR) ? SC : SI;
*++text = PUSH;
*++text = IMM;
*++text = (expr_type > PTR) ? sizeof(int) : sizeof(char);
*++text = (token == Inc) ? SUB : ADD;
match(token);
}
else if (token == Brak) {
// array access var[xx]
match(Brak);
*++text = PUSH;
expression(Assign);
match(']');
if (tmp > PTR) {
// pointer, `not char *`
*++text = PUSH;
*++text = IMM;
*++text = sizeof(int);
*++text = MUL;
}
else if (tmp < PTR) {
printf("%lld: pointer type expected\n", line);
exit(-1);
}
expr_type = tmp - PTR;
*++text = ADD;
*++text = (expr_type == CHAR) ? LC : LI;
}
else {
printf("%lld: compiler error, token = %lld\n", line, token);
exit(-1);
}
}
}
}
/* 语法分析和语义分析,生成对应的汇编指令 */
void statement() {
// there are 6 kinds of statements here:
// 1. if (...) <statement> [else <statement>]
// 2. while (...) <statement>
// 3. { <statement> }
// 4. return xxx;
// 5. <empty statement>;
// 6. expression; (expression end with semicolon)
int *a, *b; // bess for branch control
/******** 第一种 if() 和 else() 语句 ****************/
// if (...) <statement> [else <statement>]
//
// if (...) <cond>
// JZ a
// <statement> <statement>
// else: JMP b
// a: a:
// <statement> <statement>
// b: b:
if (token == If)
{
match(If);
match('(');
//解析表达式
expression(Assign); // parse condition
match(')');
// emit code for if
*++text = JZ;
b = ++text;
//这里属于是递归调用了statement()解析语句了
statement(); // parse statement
if (token == Else)
{ // parse else
match(Else);
// emit code for JMP B
*b = (int)(text + 3);
*++text = JMP;
b = ++text;
// 这里属于也是递归调用了statement()解析语句
statement();
}
*b = (int)(text + 1);
}
/******** 第二种 while() 语句 ****************/
//
// a: a:
// while (<cond>) <cond>
// JZ b
// <statement> <statement>
// JMP a
// b: b:
else if (token == While)
{
match(While);
a = text + 1;
match('(');
expression(Assign);
match(')');
*++text = JZ;
b = ++text;
statement();
*++text = JMP;
*++text = (int)a;
*b = (int)(text + 1);
}
/******** 第三种 { 纯 statement } 语句 ****************/
else if (token == '{')
{
// { <statement> ... }
match('{');
while (token != '}')
{
statement();
}
match('}');
}
/******** 第四种 return 语句 ****************/
else if (token == Return) {
// return [expression];
match(Return);
//如果返回不为空,就解析表达式
if (token != ';')
{
expression(Assign);
}
match(';');
// emit code for return
*++text = LEV;
}
/******** 第五种 空语句 ****************/
else if (token == ';')
{
// empty statement
match(';');
}
/******** 第六种 纯表达式 语句 ****************/
else
{
// a = b; or function_call();
expression(Assign);
match(';');
}
}
//解析函数的形参列表
void function_parameter() {
int type;
int params; //形参个数
params = 0;
while (token != ')') {
// int name, ...
type = INT;
//注意这里小写的是token的类型,大写的是变量的类型
if (token == Int) {
match(Int);
} else if (token == Char) {
type = CHAR;
match(Char);
}
// pointer type 这里是解析是否为指针类型