Skip to content

Commit fa7a434

Browse files
committed
Chapter 2: Parser + AST
0 parents  commit fa7a434

File tree

7 files changed

+266
-0
lines changed

7 files changed

+266
-0
lines changed

_tags

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
<{lexer,parser}.ml>: use_camlp4, pp(camlp4of)

ast.ml

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,22 @@
1+
(*===----------------------------------------------------------------------===
2+
* Abstract Syntax Tree (aka Parse Tree)
3+
*===----------------------------------------------------------------------===*)
4+
5+
(* expr - Base type for all expression nodes. *)
6+
type expr =
7+
(* variant for numeric literals like "1.0". *)
8+
| Number of float
9+
(* variant for referencing a variable, like "a". *)
10+
| Variable of string
11+
(* variant for a binary operator. *)
12+
| Binary of char * expr * expr
13+
(* variant for function calls. *)
14+
| Call of string * expr array
15+
16+
(* proto - This type represents the "prototype" for a function, which captures
17+
* its name, and its argument names (thus implicitly the number of arguments the
18+
* function takes). *)
19+
type proto = Prototype of string * string array
20+
21+
(* func - This type represents a function definition itself. *)
22+
type func = Function of proto * expr

lexer.ml

Lines changed: 52 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,52 @@
1+
(*===----------------------------------------------------------------------===
2+
* Lexer
3+
*===----------------------------------------------------------------------===*)
4+
5+
let rec lex = parser
6+
(* Skip any whitespace. *)
7+
| [< ' (' ' | '\n' | '\r' | '\t'); stream >] -> lex stream
8+
9+
(* identifier: [a-zA-Z][a-zA-Z0-9] *)
10+
| [< ' ('A' .. 'Z' | 'a' .. 'z' as c); stream >] ->
11+
let buffer = Buffer.create 1 in
12+
Buffer.add_char buffer c;
13+
lex_ident buffer stream
14+
15+
(* number: [0-9.]+ *)
16+
| [< ' ('0' .. '9' as c); stream >] ->
17+
let buffer = Buffer.create 1 in
18+
Buffer.add_char buffer c;
19+
lex_number buffer stream
20+
21+
(* Comment until end of line. *)
22+
| [< ' ('#'); stream >] ->
23+
lex_comment stream
24+
25+
(* Otherwise, just return the character as its ascii value. *)
26+
| [< 'c; stream >] ->
27+
[< 'Token.Kwd c; lex stream >]
28+
29+
(* end of stream. *)
30+
| [< >] -> [< >]
31+
32+
and lex_number buffer = parser
33+
| [< ' ('0' .. '9' | '.' as c); stream >] ->
34+
Buffer.add_char buffer c;
35+
lex_number buffer stream
36+
| [< stream=lex >] ->
37+
[< 'Token.Number (float_of_string (Buffer.contents buffer)); stream >]
38+
39+
and lex_ident buffer = parser
40+
| [< ' ('A' .. 'Z' | 'a' .. 'z' | '0' .. '9' as c); stream >] ->
41+
Buffer.add_char buffer c;
42+
lex_ident buffer stream
43+
| [< stream=lex >] ->
44+
match Buffer.contents buffer with
45+
| "def" -> [< 'Token.Def; stream >]
46+
| "extern" -> [< 'Token.Extern; stream >]
47+
| id -> [< 'Token.Ident id; stream >]
48+
49+
and lex_comment = parser
50+
| [< ' ('\n'); stream=lex >] -> stream
51+
| [< 'c; e=lex_comment >] -> e
52+
| [< >] -> [< >]

parser.ml

Lines changed: 122 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,122 @@
1+
(*===---------------------------------------------------------------------===
2+
* Parser
3+
*===---------------------------------------------------------------------===*)
4+
5+
(* binop_precedence - This holds the precedence for each binary operator that is
6+
* defined *)
7+
let binop_precedence:(char, int) Hashtbl.t = Hashtbl.create 10
8+
9+
(* precedence - Get the precedence of the pending binary operator token. *)
10+
let precedence c = try Hashtbl.find binop_precedence c with Not_found -> -1
11+
12+
(* primary
13+
* ::= identifier
14+
* ::= numberexpr
15+
* ::= parenexpr *)
16+
let rec parse_primary = parser
17+
(* numberexpr ::= number *)
18+
| [< 'Token.Number n >] -> Ast.Number n
19+
20+
(* parenexpr ::= '(' expression ')' *)
21+
| [< 'Token.Kwd '('; e=parse_expr; 'Token.Kwd ')' ?? "expected ')'" >] -> e
22+
23+
(* identifierexpr
24+
* ::= identifier
25+
* ::= identifier '(' argumentexpr ')' *)
26+
| [< 'Token.Ident id; stream >] ->
27+
let rec parse_args accumulator = parser
28+
| [< e=parse_expr; stream >] ->
29+
begin parser
30+
| [< 'Token.Kwd ','; e=parse_args (e :: accumulator) >] -> e
31+
| [< >] -> e :: accumulator
32+
end stream
33+
| [< >] -> accumulator
34+
in
35+
let rec parse_ident id = parser
36+
(* Call. *)
37+
| [< 'Token.Kwd '(';
38+
args=parse_args [];
39+
'Token.Kwd ')' ?? "expected ')'">] ->
40+
Ast.Call (id, Array.of_list (List.rev args))
41+
42+
(* Simple variable ref. *)
43+
| [< >] -> Ast.Variable id
44+
in
45+
parse_ident id stream
46+
47+
| [< >] -> raise (Stream.Error "unknown token when expecting an expression.")
48+
49+
(* binoprhs
50+
* ::= ('+' primary)* *)
51+
and parse_bin_rhs expr_prec lhs stream =
52+
match Stream.peek stream with
53+
(* If this is a binop, find its precedence. *)
54+
| Some (Token.Kwd c) when Hashtbl.mem binop_precedence c ->
55+
let token_prec = precedence c in
56+
57+
(* If this is a binop that binds at least as tightly as the current binop,
58+
* consume it, otherwise we are done. *)
59+
if token_prec < expr_prec then lhs else begin
60+
(* Eat the binop. *)
61+
Stream.junk stream;
62+
63+
(* Parse the primary expression after the binary operator. *)
64+
let rhs = parse_primary stream in
65+
66+
(* Okay, we know this is a binop. *)
67+
let rhs =
68+
match Stream.peek stream with
69+
| Some (Token.Kwd c2) ->
70+
(* If BinOp binds less tightly with rhs than the operator after
71+
* rhs, let the pending operator take rhs as its lhs. *)
72+
let next_prec = precedence c2 in
73+
if token_prec < next_prec
74+
then parse_bin_rhs (token_prec + 1) rhs stream
75+
else rhs
76+
| _ -> rhs
77+
in
78+
79+
(* Merge lhs/rhs. *)
80+
let lhs = Ast.Binary (c, lhs, rhs) in
81+
parse_bin_rhs expr_prec lhs stream
82+
end
83+
| _ -> lhs
84+
85+
(* expression
86+
* ::= primary binoprhs *)
87+
and parse_expr = parser
88+
| [< lhs=parse_primary; stream >] -> parse_bin_rhs 0 lhs stream
89+
90+
(* prototype
91+
* ::= id '(' id* ')' *)
92+
let parse_prototype =
93+
let rec parse_args accumulator = parser
94+
| [< 'Token.Ident id; e=parse_args (id::accumulator) >] -> e
95+
| [< >] -> accumulator
96+
in
97+
98+
parser
99+
| [< 'Token.Ident id;
100+
'Token.Kwd '(' ?? "expected '(' in prototype";
101+
args=parse_args [];
102+
'Token.Kwd ')' ?? "expected ')' in prototype" >] ->
103+
(* success. *)
104+
Ast.Prototype (id, Array.of_list (List.rev args))
105+
106+
| [< >] ->
107+
raise (Stream.Error "expected function name in prototype")
108+
109+
(* definition ::= 'def' prototype expression *)
110+
let parse_definition = parser
111+
| [< 'Token.Def; p=parse_prototype; e=parse_expr >] ->
112+
Ast.Function (p, e)
113+
114+
(* toplevelexpr ::= expression *)
115+
let parse_toplevel = parser
116+
| [< e=parse_expr >] ->
117+
(* Make an anonymous proto. *)
118+
Ast.Function (Ast.Prototype ("", [||]), e)
119+
120+
(* external ::= 'extern' prototype *)
121+
let parse_extern = parser
122+
| [< 'Token.Extern; e=parse_prototype >] -> e

token.ml

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
(*===----------------------------------------------------------------------===
2+
* Lexer Tokens
3+
*===----------------------------------------------------------------------===*)
4+
5+
(* The lexer returns these 'Kwd' if it is an unknown character, otherwise one of
6+
* these others for known things. *)
7+
type token =
8+
(* commands *)
9+
| Def | Extern
10+
(* primary *)
11+
| Ident of string | Number of float
12+
(* unknown *)
13+
| Kwd of char

toplevel.ml

Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,34 @@
1+
(*===----------------------------------------------------------------------===
2+
* Top-Level parsing and JIT Driver
3+
*===----------------------------------------------------------------------===*)
4+
5+
(* top ::= definition | external | expression | ';' *)
6+
let rec main_loop stream =
7+
match Stream.peek stream with
8+
| None -> ()
9+
10+
(* ignore top-level semicolons. *)
11+
| Some (Token.Kwd ';') ->
12+
Stream.junk stream;
13+
main_loop stream
14+
15+
| Some token ->
16+
begin
17+
try match token with
18+
| Token.Def ->
19+
ignore(Parser.parse_definition stream);
20+
print_endline "parsed a function definition.";
21+
| Token.Extern ->
22+
ignore(Parser.parse_extern stream);
23+
print_endline "parsed an extern.";
24+
| _ ->
25+
(* Evaluate a top-level expression into an anonymous function. *)
26+
ignore(Parser.parse_toplevel stream);
27+
print_endline "parsed a top-level expr";
28+
with Stream.Error s ->
29+
(* Skip token for error recovery. *)
30+
Stream.junk stream;
31+
print_endline s;
32+
end;
33+
print_string "ready> "; flush stdout;
34+
main_loop stream

toy.ml

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,22 @@
1+
2+
(*===----------------------------------------------------------------------===
3+
* Main driver code.
4+
*===----------------------------------------------------------------------===*)
5+
6+
let main () =
7+
(* Install standard binary operators.
8+
* 1 is the lowest precedence. *)
9+
Hashtbl.add Parser.binop_precedence '<' 10;
10+
Hashtbl.add Parser.binop_precedence '+' 20;
11+
Hashtbl.add Parser.binop_precedence '-' 20;
12+
Hashtbl.add Parser.binop_precedence '*' 40; (* highest. *)
13+
14+
(* Prime the first token. *)
15+
print_string "ready> "; flush stdout;
16+
let stream = Lexer.lex (Stream.of_channel stdin) in
17+
18+
(* Run the main "interpreter loop" now. *)
19+
Toplevel.main_loop stream;
20+
;;
21+
22+
main ()

0 commit comments

Comments
 (0)