1 Star 0 Fork 0

yenmuse/c4

加入 Gitee
与超过 1200万 开发者一起发现、参与优秀开源项目,私有仓库也完全免费 :)
免费加入
文件
克隆/下载
c4.c 20.20 KB
一键复制 编辑 原始数据 按行查看 历史
rswier 提交于 2014-12-19 03:09 . void support
// c4.c - C in four functions
// char, int, and pointer types
// if, while, return, and expression statements
// just enough features to allow self-compilation and a bit more
// Written by Robert Swierczek
#include <stdio.h>
#include <stdlib.h>
#include <memory.h>
#include <unistd.h>
#ifdef _WIN32
#include "w32.h"
#endif
char *p, *lp, // current position in source code
*data; // data/bss pointer
int *e, *le, // current position in emitted code
*id, // currently parsed identifier
*sym, // symbol table (simple list of identifiers)
tk, // current token
ival, // current token value
ty, // current expression type
loc, // local variable offset
line, // current line number
src, // print source and assembly flag
debug; // print executed instructions
// tokens and classes (operators last and in precedence order)
enum {
Num = 128, Fun, Sys, Glo, Loc, Id,
Char, Else, Enum, If, Int, Return, Sizeof, While,
Assign, Cond, Lor, Lan, Or, Xor, And, Eq, Ne, Lt, Gt, Le, Ge, Shl, Shr, Add, Sub, Mul, Div, Mod, Inc, Dec, Brak
};
// opcodes
enum { LEA ,IMM ,JMP ,JSR ,BZ ,BNZ ,ENT ,ADJ ,LEV ,LI ,LC ,SI ,SC ,PSH ,
OR ,XOR ,AND ,EQ ,NE ,LT ,GT ,LE ,GE ,SHL ,SHR ,ADD ,SUB ,MUL ,DIV ,MOD ,
OPEN,READ,CLOS,PRTF,MALC,MSET,MCMP,MCPY,MMAP,DSYM,QSRT,EXIT };
// types
enum { CHAR, INT, PTR };
// identifier offsets (since we can't create an ident struct)
enum { Tk, Hash, Name, Class, Type, Val, HClass, HType, HVal, Idsz };
void next()
{
char *pp;
while (tk = *p) {
++p;
if (tk == '\n') {
if (src) {
printf("%d: %.*s", line, p - lp, lp);
lp = p;
while (le < e) {
printf("%8.4s", &"LEA ,IMM ,JMP ,JSR ,BZ ,BNZ ,ENT ,ADJ ,LEV ,LI ,LC ,SI ,SC ,PSH ,"
"OR ,XOR ,AND ,EQ ,NE ,LT ,GT ,LE ,GE ,SHL ,SHR ,ADD ,SUB ,MUL ,DIV ,MOD ,"
"OPEN,READ,CLOS,PRTF,MALC,MSET,MCMP,MCPY,MMAP,DSYM,QSRT,EXIT,"[*++le * 5]);
if (*le <= ADJ) printf(" %d\n", *++le); else printf("\n");
}
}
++line;
}
else if (tk == '#') {
while (*p != 0 && *p != '\n') ++p;
}
else if ((tk >= 'a' && tk <= 'z') || (tk >= 'A' && tk <= 'Z') || tk == '_') {
pp = p - 1;
while ((*p >= 'a' && *p <= 'z') || (*p >= 'A' && *p <= 'Z') || (*p >= '0' && *p <= '9') || *p == '_')
tk = tk * 147 + *p++;
tk = (tk << 6) + (p - pp);
id = sym;
while (id[Tk]) {
if (tk == id[Hash] && !memcmp((char *)id[Name], pp, p - pp)) { tk = id[Tk]; return; }
id = id + Idsz;
}
id[Name] = (int)pp;
id[Hash] = tk;
tk = id[Tk] = Id;
return;
}
else if (tk >= '0' && tk <= '9') {
if (ival = tk - '0') { while (*p >= '0' && *p <= '9') ival = ival * 10 + *p++ - '0'; }
else if (*p == 'x' || *p == 'X') {
while ((tk = *++p) && ((tk >= '0' && tk <= '9') || (tk >= 'a' && tk <= 'f') || (tk >= 'A' && tk <= 'F')))
ival = ival * 16 + (tk & 15) + (tk >= 'A' ? 9 : 0);
}
else { while (*p >= '0' && *p <= '7') ival = ival * 8 + *p++ - '0'; }
tk = Num;
return;
}
else if (tk == '/') {
if (*p == '/') {
++p;
while (*p != 0 && *p != '\n') ++p;
}
else {
tk = Div;
return;
}
}
else if (tk == '\'' || tk == '"') {
pp = data;
while (*p != 0 && *p != tk) {
if ((ival = *p++) == '\\') {
if ((ival = *p++) == 'n') ival = '\n';
}
if (tk == '"') *data++ = ival;
}
++p;
if (tk == '"') ival = (int)pp; else tk = Num;
return;
}
else if (tk == '=') { if (*p == '=') { ++p; tk = Eq; } else tk = Assign; return; }
else if (tk == '+') { if (*p == '+') { ++p; tk = Inc; } else tk = Add; return; }
else if (tk == '-') { if (*p == '-') { ++p; tk = Dec; } else tk = Sub; return; }
else if (tk == '!') { if (*p == '=') { ++p; tk = Ne; } return; }
else if (tk == '<') { if (*p == '=') { ++p; tk = Le; } else if (*p == '<') { ++p; tk = Shl; } else tk = Lt; return; }
else if (tk == '>') { if (*p == '=') { ++p; tk = Ge; } else if (*p == '>') { ++p; tk = Shr; } else tk = Gt; return; }
else if (tk == '|') { if (*p == '|') { ++p; tk = Lor; } else tk = Or; return; }
else if (tk == '&') { if (*p == '&') { ++p; tk = Lan; } else tk = And; return; }
else if (tk == '^') { tk = Xor; return; }
else if (tk == '%') { tk = Mod; return; }
else if (tk == '*') { tk = Mul; return; }
else if (tk == '[') { tk = Brak; return; }
else if (tk == '?') { tk = Cond; return; }
else if (tk == '~' || tk == ';' || tk == '{' || tk == '}' || tk == '(' || tk == ')' || tk == ']' || tk == ',' || tk == ':') return;
}
}
void expr(int lev)
{
int t, *d;
if (!tk) { printf("%d: unexpected eof in expression\n", line); exit(-1); }
else if (tk == Num) { *++e = IMM; *++e = ival; next(); ty = INT; }
else if (tk == '"') {
*++e = IMM; *++e = ival; next();
while (tk == '"') next();
data = (char *)((int)data + sizeof(int) & -sizeof(int)); ty = PTR;
}
else if (tk == Sizeof) {
next(); if (tk == '(') next(); else { printf("%d: open paren expected in sizeof\n", line); exit(-1); }
ty = INT; if (tk == Int) next(); else if (tk == Char) { next(); ty = CHAR; }
while (tk == Mul) { next(); ty = ty + PTR; }
if (tk == ')') next(); else { printf("%d: close paren expected in sizeof\n", line); exit(-1); }
*++e = IMM; *++e = (ty == CHAR) ? sizeof(char) : sizeof(int);
ty = INT;
}
else if (tk == Id) {
d = id; next();
if (tk == '(') {
next();
t = 0;
while (tk != ')') { expr(Assign); *++e = PSH; ++t; if (tk == ',') next(); }
next();
if (d[Class] == Sys) *++e = d[Val];
else if (d[Class] == Fun) { *++e = JSR; *++e = d[Val]; }
else { printf("%d: bad function call\n", line); exit(-1); }
if (t) { *++e = ADJ; *++e = t; }
ty = d[Type];
}
else if (d[Class] == Num) { *++e = IMM; *++e = d[Val]; ty = INT; }
else {
if (d[Class] == Loc) { *++e = LEA; *++e = loc - d[Val]; }
else if (d[Class] == Glo) { *++e = IMM; *++e = d[Val]; }
else { printf("%d: undefined variable\n", line); exit(-1); }
*++e = ((ty = d[Type]) == CHAR) ? LC : LI;
}
}
else if (tk == '(') {
next();
if (tk == Int || tk == Char) {
t = (tk == Int) ? INT : CHAR; next();
while (tk == Mul) { next(); t = t + PTR; }
if (tk == ')') next(); else { printf("%d: bad cast\n", line); exit(-1); }
expr(Inc);
ty = t;
}
else {
expr(Assign);
if (tk == ')') next(); else { printf("%d: close paren expected\n", line); exit(-1); }
}
}
else if (tk == Mul) {
next(); expr(Inc);
if (ty > INT) ty = ty - PTR; else { printf("%d: bad dereference\n", line); exit(-1); }
*++e = (ty == CHAR) ? LC : LI;
}
else if (tk == And) {
next(); expr(Inc);
if (*e == LC || *e == LI) --e; else { printf("%d: bad address-of\n", line); exit(-1); }
ty = ty + PTR;
}
else if (tk == '!') { next(); expr(Inc); *++e = PSH; *++e = IMM; *++e = 0; *++e = EQ; ty = INT; }
else if (tk == '~') { next(); expr(Inc); *++e = PSH; *++e = IMM; *++e = -1; *++e = XOR; ty = INT; }
else if (tk == Add) { next(); expr(Inc); ty = INT; }
else if (tk == Sub) {
next(); *++e = IMM;
if (tk == Num) { *++e = -ival; next(); } else { *++e = -1; *++e = PSH; expr(Inc); *++e = MUL; }
ty = INT;
}
else if (tk == Inc || tk == Dec) {
t = tk; next(); expr(Inc);
if (*e == LC) { *e = PSH; *++e = LC; }
else if (*e == LI) { *e = PSH; *++e = LI; }
else { printf("%d: bad lvalue in pre-increment\n", line); exit(-1); }
*++e = PSH;
*++e = IMM; *++e = (ty > PTR) ? sizeof(int) : sizeof(char);
*++e = (t == Inc) ? ADD : SUB;
*++e = (ty == CHAR) ? SC : SI;
}
else { printf("%d: bad expression\n", line); exit(-1); }
while (tk >= lev) { // "precedence climbing" or "Top Down Operator Precedence" method
t = ty;
if (tk == Assign) {
next();
if (*e == LC || *e == LI) *e = PSH; else { printf("%d: bad lvalue in assignment\n", line); exit(-1); }
expr(Assign); *++e = ((ty = t) == CHAR) ? SC : SI;
}
else if (tk == Cond) {
next();
*++e = BZ; d = ++e;
expr(Assign);
if (tk == ':') next(); else { printf("%d: conditional missing colon\n", line); exit(-1); }
*d = (int)(e + 3); *++e = JMP; d = ++e;
expr(Cond);
*d = (int)(e + 1);
}
else if (tk == Lor) { next(); *++e = BNZ; d = ++e; expr(Lan); *d = (int)(e + 1); ty = INT; }
else if (tk == Lan) { next(); *++e = BZ; d = ++e; expr(Or); *d = (int)(e + 1); ty = INT; }
else if (tk == Or) { next(); *++e = PSH; expr(Xor); *++e = OR; ty = INT; }
else if (tk == Xor) { next(); *++e = PSH; expr(And); *++e = XOR; ty = INT; }
else if (tk == And) { next(); *++e = PSH; expr(Eq); *++e = AND; ty = INT; }
else if (tk == Eq) { next(); *++e = PSH; expr(Lt); *++e = EQ; ty = INT; }
else if (tk == Ne) { next(); *++e = PSH; expr(Lt); *++e = NE; ty = INT; }
else if (tk == Lt) { next(); *++e = PSH; expr(Shl); *++e = LT; ty = INT; }
else if (tk == Gt) { next(); *++e = PSH; expr(Shl); *++e = GT; ty = INT; }
else if (tk == Le) { next(); *++e = PSH; expr(Shl); *++e = LE; ty = INT; }
else if (tk == Ge) { next(); *++e = PSH; expr(Shl); *++e = GE; ty = INT; }
else if (tk == Shl) { next(); *++e = PSH; expr(Add); *++e = SHL; ty = INT; }
else if (tk == Shr) { next(); *++e = PSH; expr(Add); *++e = SHR; ty = INT; }
else if (tk == Add) {
next(); *++e = PSH; expr(Mul);
if ((ty = t) > PTR) { *++e = PSH; *++e = IMM; *++e = sizeof(int); *++e = MUL; }
*++e = ADD;
}
else if (tk == Sub) {
next(); *++e = PSH; expr(Mul);
if ((ty = t) > PTR) { *++e = PSH; *++e = IMM; *++e = sizeof(int); *++e = MUL; }
*++e = SUB;
}
else if (tk == Mul) { next(); *++e = PSH; expr(Inc); *++e = MUL; ty = INT; }
else if (tk == Div) { next(); *++e = PSH; expr(Inc); *++e = DIV; ty = INT; }
else if (tk == Mod) { next(); *++e = PSH; expr(Inc); *++e = MOD; ty = INT; }
else if (tk == Inc || tk == Dec) {
if (*e == LC) { *e = PSH; *++e = LC; }
else if (*e == LI) { *e = PSH; *++e = LI; }
else { printf("%d: bad lvalue in post-increment\n", line); exit(-1); }
*++e = PSH; *++e = IMM; *++e = (ty > PTR) ? sizeof(int) : sizeof(char);
*++e = (tk == Inc) ? ADD : SUB;
*++e = (ty == CHAR) ? SC : SI;
*++e = PSH; *++e = IMM; *++e = (ty > PTR) ? sizeof(int) : sizeof(char);
*++e = (tk == Inc) ? SUB : ADD;
next();
}
else if (tk == Brak) {
next(); *++e = PSH; expr(Assign);
if (tk == ']') next(); else { printf("%d: close bracket expected\n", line); exit(-1); }
if (t > PTR) { *++e = PSH; *++e = IMM; *++e = sizeof(int); *++e = MUL; }
else if (t < PTR) { printf("%d: pointer type expected\n", line); exit(-1); }
*++e = ADD;
*++e = ((ty = t - PTR) == CHAR) ? LC : LI;
}
else { printf("%d: compiler error tk=%d\n", line, tk); exit(-1); }
}
}
void stmt()
{
int *a, *b;
if (tk == If) {
next();
if (tk == '(') next(); else { printf("%d: open paren expected\n", line); exit(-1); }
expr(Assign);
if (tk == ')') next(); else { printf("%d: close paren expected\n", line); exit(-1); }
*++e = BZ; b = ++e;
stmt();
if (tk == Else) {
*b = (int)(e + 3); *++e = JMP; b = ++e;
next();
stmt();
}
*b = (int)(e + 1);
}
else if (tk == While) {
next();
a = e + 1;
if (tk == '(') next(); else { printf("%d: open paren expected\n", line); exit(-1); }
expr(Assign);
if (tk == ')') next(); else { printf("%d: close paren expected\n", line); exit(-1); }
*++e = BZ; b = ++e;
stmt();
*++e = JMP; *++e = (int)a;
*b = (int)(e + 1);
}
else if (tk == Return) {
next();
if (tk != ';') expr(Assign);
*++e = LEV;
if (tk == ';') next(); else { printf("%d: semicolon expected\n", line); exit(-1); }
}
else if (tk == '{') {
next();
while (tk != '}') stmt();
next();
}
else if (tk == ';') {
next();
}
else {
expr(Assign);
if (tk == ';') next(); else { printf("%d: semicolon expected\n", line); exit(-1); }
}
}
int main(int argc, char **argv)
{
int fd, bt, ty, poolsz, *idmain;
int *pc, *sp, *bp, a, cycle; // vm registers
int i, *t; // temps
--argc; ++argv;
if (argc > 0 && **argv == '-' && (*argv)[1] == 's') { src = 1; --argc; ++argv; }
if (argc > 0 && **argv == '-' && (*argv)[1] == 'd') { debug = 1; --argc; ++argv; }
if (argc < 1) { printf("usage: c4 [-s] [-d] file ...\n"); return -1; }
if ((fd = open(*argv, 0)) < 0) { printf("could not open(%s)\n", *argv); return -1; }
poolsz = 256*1024; // arbitrary size
if (!(sym = malloc(poolsz))) { printf("could not malloc(%d) symbol area\n", poolsz); return -1; }
if (!(le = e = malloc(poolsz))) { printf("could not malloc(%d) text area\n", poolsz); return -1; }
if (!(data = malloc(poolsz))) { printf("could not malloc(%d) data area\n", poolsz); return -1; }
if (!(sp = malloc(poolsz))) { printf("could not malloc(%d) stack area\n", poolsz); return -1; }
memset(sym, 0, poolsz);
memset(e, 0, poolsz);
memset(data, 0, poolsz);
p = "char else enum if int return sizeof while "
"open read close printf malloc memset memcmp memcpy mmap dlsym qsort exit void main";
i = Char; while (i <= While) { next(); id[Tk] = i++; } // add keywords to symbol table
i = OPEN; while (i <= EXIT) { next(); id[Class] = Sys; id[Type] = INT; id[Val] = i++; } // add library to symbol table
next(); id[Tk] = Char; // handle void type
next(); idmain = id; // keep track of main
if (!(lp = p = malloc(poolsz))) { printf("could not malloc(%d) source area\n", poolsz); return -1; }
if ((i = read(fd, p, poolsz-1)) <= 0) { printf("read() returned %d\n", i); return -1; }
p[i] = 0;
close(fd);
// parse declarations
line = 1;
next();
while (tk) {
bt = INT; // basetype
if (tk == Int) next();
else if (tk == Char) { next(); bt = CHAR; }
else if (tk == Enum) {
next();
if (tk != '{') next();
if (tk == '{') {
next();
i = 0;
while (tk != '}') {
if (tk != Id) { printf("%d: bad enum identifier %d\n", line, tk); return -1; }
next();
if (tk == Assign) {
next();
if (tk != Num) { printf("%d: bad enum initializer\n", line); return -1; }
i = ival;
next();
}
id[Class] = Num; id[Type] = INT; id[Val] = i++;
if (tk == ',') next();
}
next();
}
}
while (tk != ';' && tk != '}') {
ty = bt;
while (tk == Mul) { next(); ty = ty + PTR; }
if (tk != Id) { printf("%d: bad global declaration\n", line); return -1; }
if (id[Class]) { printf("%d: duplicate global definition\n", line); return -1; }
next();
id[Type] = ty;
if (tk == '(') { // function
id[Class] = Fun;
id[Val] = (int)(e + 1);
next(); i = 0;
while (tk != ')') {
ty = INT;
if (tk == Int) next();
else if (tk == Char) { next(); ty = CHAR; }
while (tk == Mul) { next(); ty = ty + PTR; }
if (tk != Id) { printf("%d: bad parameter declaration\n", line); return -1; }
if (id[Class] == Loc) { printf("%d: duplicate parameter definition\n", line); return -1; }
id[HClass] = id[Class]; id[Class] = Loc;
id[HType] = id[Type]; id[Type] = ty;
id[HVal] = id[Val]; id[Val] = i++;
next();
if (tk == ',') next();
}
next();
if (tk != '{') { printf("%d: bad function definition\n", line); return -1; }
loc = ++i;
next();
while (tk == Int || tk == Char) {
bt = (tk == Int) ? INT : CHAR;
next();
while (tk != ';') {
ty = bt;
while (tk == Mul) { next(); ty = ty + PTR; }
if (tk != Id) { printf("%d: bad local declaration\n", line); return -1; }
if (id[Class] == Loc) { printf("%d: duplicate local definition\n", line); return -1; }
id[HClass] = id[Class]; id[Class] = Loc;
id[HType] = id[Type]; id[Type] = ty;
id[HVal] = id[Val]; id[Val] = ++i;
next();
if (tk == ',') next();
}
next();
}
*++e = ENT; *++e = i - loc;
while (tk != '}') stmt();
*++e = LEV;
id = sym; // unwind symbol table locals
while (id[Tk]) {
if (id[Class] == Loc) {
id[Class] = id[HClass];
id[Type] = id[HType];
id[Val] = id[HVal];
}
id = id + Idsz;
}
}
else {
id[Class] = Glo;
id[Val] = (int)data;
data = data + sizeof(int);
}
if (tk == ',') next();
}
next();
}
if (!(pc = (int *)idmain[Val])) { printf("main() not defined\n"); return -1; }
if (src) return 0;
// setup stack
sp = (int *)((int)sp + poolsz);
*--sp = EXIT; // call exit if main returns
*--sp = PSH; t = sp;
*--sp = argc;
*--sp = (int)argv;
*--sp = (int)t;
// run...
cycle = 0;
while (1) {
i = *pc++; ++cycle;
if (debug) {
printf("%d> %.4s", cycle,
&"LEA ,IMM ,JMP ,JSR ,BZ ,BNZ ,ENT ,ADJ ,LEV ,LI ,LC ,SI ,SC ,PSH ,"
"OR ,XOR ,AND ,EQ ,NE ,LT ,GT ,LE ,GE ,SHL ,SHR ,ADD ,SUB ,MUL ,DIV ,MOD ,"
"OPEN,READ,CLOS,PRTF,MALC,MSET,MCMP,MCPY,MMAP,DSYM,QSRT,EXIT,"[i * 5]);
if (i <= ADJ) printf(" %d\n", *pc); else printf("\n");
}
if (i == LEA) a = (int)(bp + *pc++); // load local address
else if (i == IMM) a = *pc++; // load global address or immediate
else if (i == JMP) pc = (int *)*pc; // jump
else if (i == JSR) { *--sp = (int)(pc + 1); pc = (int *)*pc; } // jump to subroutine
else if (i == BZ) pc = a ? pc + 1 : (int *)*pc; // branch if zero
else if (i == BNZ) pc = a ? (int *)*pc : pc + 1; // branch if not zero
else if (i == ENT) { *--sp = (int)bp; bp = sp; sp = sp - *pc++; } // enter subroutine
else if (i == ADJ) sp = sp + *pc++; // stack adjust
else if (i == LEV) { sp = bp; bp = (int *)*sp++; pc = (int *)*sp++; } // leave subroutine
else if (i == LI) a = *(int *)a; // load int
else if (i == LC) a = *(char *)a; // load char
else if (i == SI) *(int *)*sp++ = a; // store int
else if (i == SC) a = *(char *)*sp++ = a; // store char
else if (i == PSH) *--sp = a; // push
else if (i == OR) a = *sp++ | a;
else if (i == XOR) a = *sp++ ^ a;
else if (i == AND) a = *sp++ & a;
else if (i == EQ) a = *sp++ == a;
else if (i == NE) a = *sp++ != a;
else if (i == LT) a = *sp++ < a;
else if (i == GT) a = *sp++ > a;
else if (i == LE) a = *sp++ <= a;
else if (i == GE) a = *sp++ >= a;
else if (i == SHL) a = *sp++ << a;
else if (i == SHR) a = *sp++ >> a;
else if (i == ADD) a = *sp++ + a;
else if (i == SUB) a = *sp++ - a;
else if (i == MUL) a = *sp++ * a;
else if (i == DIV) a = *sp++ / a;
else if (i == MOD) a = *sp++ % a;
else if (i == OPEN) a = open((char *)sp[1], *sp);
else if (i == READ) a = read(sp[2], (char *)sp[1], *sp);
else if (i == CLOS) a = close(*sp);
else if (i == PRTF) { t = sp + pc[1]; a = printf((char *)t[-1], t[-2], t[-3], t[-4], t[-5], t[-6]); }
else if (i == MALC) a = (int)malloc(*sp);
else if (i == MSET) a = (int)memset((char *)sp[2], sp[1], *sp);
else if (i == MCMP) a = memcmp((char *)sp[2], (char *)sp[1], *sp);
else if (i == MCPY) a = (int)memcpy((char *)sp[2], (char *)sp[1], *sp);
else if (i == MMAP) a = (int)mmap((char *)sp[5], sp[4], sp[3], sp[2], sp[1], *sp);
else if (i == DSYM) a = (int)dlsym((char *)sp[1], (char *)*sp);
else if (i == QSRT) qsort((char *)sp[3], sp[2], sp[1], (void *)*sp);
else if (i == EXIT) { printf("exit(%d) cycle = %d\n", *sp, cycle); return *sp; }
else { printf("unknown instruction = %d! cycle = %d\n", i, cycle); return -1; }
}
}
马建仓 AI 助手
尝试更多
代码解读
代码找茬
代码优化
C
1
https://gitee.com/yenmuse/c4.git
git@gitee.com:yenmuse/c4.git
yenmuse
c4
c4
master

搜索帮助