Skip to content

Commit 10f2452

Browse files
committed
merge lexical branch
1 parent 845857f commit 10f2452

File tree

14 files changed

+280
-33
lines changed

14 files changed

+280
-33
lines changed

.gitignore

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -50,3 +50,7 @@ modules.order
5050
Module.symvers
5151
Mkfile.old
5252
dkms.conf
53+
54+
obj/*
55+
bin/*
56+
.vscode

Makefile

Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,38 @@
1+
CC=g++
2+
SRC=src
3+
BIN=bin
4+
OBJ=obj
5+
ARGS=-g
6+
TEST=tests
7+
DB=gdb
8+
FILE=$(TEST)/test.classic
9+
10+
11+
include src/lexical/Makefile
12+
include tests/Makefile
13+
14+
15+
run-lexer: $(BIN)/lexer
16+
$< $(FILE)
17+
18+
debug-file: $(TEST)/bin/main
19+
clear
20+
$(DB) --args $< $(FILE)
21+
22+
debug: $(TEST)/bin/main
23+
clear
24+
$(DB) $<
25+
26+
run-file: $(TEST)/bin/main
27+
clear
28+
$< $(FILE)
29+
30+
run: $(TEST)/bin/main
31+
clear
32+
$<
33+
34+
$(OBJ)/%.o: $(SRC)/utils/%.c
35+
$(CC) $(ARGS) -c $< -o $@
36+
37+
clean:
38+
rm -rf obj/* bin/* src/lexical/lex.yy.c src/lexical/lex.h

bin/.placeholder

Whitespace-only changes.

obj/.placeholder

Whitespace-only changes.

src/lexical/Makefile

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
lexer: $(BIN)/lexer
2+
3+
$(BIN)/lexer: $(OBJ)/scan.o $(OBJ)/lex.yy.o $(OBJ)/util.o $(OBJ)/tokens.o
4+
$(CC) $(ARGS) $^ -o $@
5+
6+
$(SRC)/lexical/lex.yy.c: $(SRC)/lexical/classic.lex
7+
lex -v -o $@ $<
8+
9+
$(OBJ)/%.o: $(SRC)/lexical/%.cpp
10+
$(CC) $(ARGS) -c $< -o $@
11+
12+
$(OBJ)/%.o: $(SRC)/lexical/%.c
13+
$(CC) $(ARGS) -c $< -o $@

src/lexical/classic.lex

Lines changed: 70 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,70 @@
1+
%{
2+
#include <string.h>
3+
#include <stdlib.h>
4+
#include <stdio.h>
5+
6+
#include "tokens.h"
7+
8+
extern int pos;
9+
int _pos=1;
10+
11+
extern "C" int yywrap(void)
12+
{
13+
_pos=1;
14+
return 1;
15+
}
16+
17+
void adjust(void)
18+
{
19+
pos=_pos;
20+
_pos+=yyleng;
21+
}
22+
23+
%}
24+
25+
id [a-zA-Z][a-zA-Z0-9]*
26+
int [-]?[0-9]+
27+
double [-+]?[0-9]+\.?[0-9]*
28+
29+
%Start COMMENT
30+
%%
31+
32+
<INITIAL>nota {adjust(); yylval = Token(Lexeme::NOTA); return 1;}
33+
<INITIAL>adesm {adjust(); yylval = Token(Lexeme::ADESM); return 1;}
34+
<INITIAL>desm {adjust(); yylval = Token(Lexeme::DESM); return 1;}
35+
<INITIAL>int {adjust(); yylval = Token(Lexeme::INT); return 1;}
36+
<INITIAL>dupl {adjust(); yylval = Token(Lexeme::DUPL); return 1;}
37+
<INITIAL>series {adjust(); yylval = Token(Lexeme::SERIES); return 1;}
38+
<INITIAL>exemp {adjust(); yylval = Token(Lexeme::EXEMP); return 1;}
39+
<INITIAL>oper {adjust(); yylval = Token(Lexeme::OPER); return 1;}
40+
<INITIAL>anef {adjust(); yylval = Token(Lexeme::ANEF); return 1;}
41+
<INITIAL>return {adjust(); yylval = Token(Lexeme::RETURN); return 1;}
42+
43+
<INITIAL>{id} {adjust(); yylval = Token(Lexeme::ID, yytext); return 1;}
44+
<INITIAL>{int} {adjust(); yylval = Token(Lexeme::NUM_I, yytext); return 1;}
45+
<INITIAL>{double} {adjust(); yylval = Token(Lexeme::NUM_D, yytext); return 1;}
46+
47+
<INITIAL>"+" {adjust(); yylval = Token(Lexeme::PLUS); return 1;}
48+
<INITIAL>"-" {adjust(); yylval = Token(Lexeme::MINUS); return 1;}
49+
<INITIAL>"*" {adjust(); yylval = Token(Lexeme::TIMES); return 1;}
50+
<INITIAL>"/" {adjust(); yylval = Token(Lexeme::DIV); return 1;}
51+
<INITIAL>"\\" {adjust(); yylval = Token(Lexeme::BACKSLASH); return 1;}
52+
<INITIAL>"(" {adjust(); yylval = Token(Lexeme::LPAREN); return 1;}
53+
<INITIAL>")" {adjust(); yylval = Token(Lexeme::RPAREN); return 1;}
54+
<INITIAL>"{" {adjust(); yylval = Token(Lexeme::LBRACK); return 1;}
55+
<INITIAL>"}" {adjust(); yylval = Token(Lexeme::RBRACK); return 1;}
56+
<INITIAL>"." {adjust(); yylval = Token(Lexeme::DOT); return 1;}
57+
<INITIAL>"\"" {adjust(); yylval = Token(Lexeme::DQUOTE); return 1;}
58+
<INITIAL>"=" {adjust(); yylval = Token(Lexeme::EQUAL); return 1;}
59+
<INITIAL>";" {adjust(); yylval = Token(Lexeme::SEMICOLON); return 1;}
60+
<INITIAL>":" {adjust(); yylval = Token(Lexeme::COLON); return 1;}
61+
<INITIAL>"," {adjust(); yylval = Token(Lexeme::COMMA); return 1;}
62+
63+
<INITIAL>" " {adjust(); continue;}
64+
<INITIAL>\n {adjust(); continue;}
65+
<INITIAL>\t {adjust(); continue;}
66+
<INITIAL>. {adjust(); fprintf(stderr, "illegal token");}
67+
68+
<INITIAL>"/*" {adjust(); BEGIN COMMENT;}
69+
<COMMENT>"*/" {adjust(); BEGIN INITIAL;}
70+
<COMMENT>. {adjust();}

src/lexical/scan.cpp

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,20 @@
1+
#include "scan.h"
2+
3+
4+
int main(int argc, char **argv) {
5+
if (argc != 2) {
6+
fprintf(stderr, "usage: ./a.out filename\n");
7+
exit(1);
8+
}
9+
10+
yyin = fopen(argv[1], "r");
11+
int i;
12+
13+
14+
for (;;) {
15+
i = yylex();
16+
if (i == 0) break;
17+
std::cout << yylval.lexeme_str << " " << pos << " " << yylval.semantval() << "\n";
18+
}
19+
return 0;
20+
}

src/lexical/scan.h

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,17 @@
1+
#pragma once
2+
3+
#include <unordered_map>
4+
#include <string>
5+
#include <iostream>
6+
7+
#include "tokens.h"
8+
9+
using namespace std;
10+
11+
extern FILE *yyin;
12+
13+
Token yylval;
14+
15+
int pos = 0;
16+
17+
int yylex(void); /* prototype for the lexing function */

src/lexical/tokens.cpp

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
#include "tokens.h"
2+
3+
std::string Lexeme::semantic[] = {Lexeme::ID, Lexeme::NUM_I, Lexeme::NUM_D, Lexeme::STR};
4+
5+
std::string Token::semantval() {
6+
if (std::find(std::begin(Lexeme::semantic), std::end(Lexeme::semantic), lexeme_str) != std::end(Lexeme::semantic)) {
7+
return semant_str;
8+
}
9+
return "";
10+
}

src/lexical/tokens.h

Lines changed: 59 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -1,36 +1,63 @@
11
#pragma once
22

3+
#include <string>
4+
#include <algorithm>
5+
36
#include "../utils/util.h"
47

5-
typedef union {
6-
int pos;
7-
int ival;
8-
str sval;
9-
} YYSTYPE;
10-
extern YYSTYPE yylval;
11-
12-
13-
# define PLUS 257 // +
14-
# define MINUS 258 // -
15-
# define TIMES 259 // *
16-
# define DIV 260 // /
17-
# define BACKSLASH 261 /* \ */
18-
# define LPAREN 262 2// ()
19-
# define RPAREN 263 // )
20-
# define LBRACK 264 // {
21-
# define RBRACK 265 // }
22-
# define DOT 266 // .
23-
# define DQUOTE 267 // "
24-
# define EQUAL 268 // =
25-
# define SEMICOLON 269 // ;
26-
# define COLON 270 // :
27-
# define ID 271
28-
# define NUM 272
29-
# define NOTA 273
30-
# define ADESM 274
31-
# define DESM 275
32-
# define INT 276
33-
# define SERIES 277
34-
# define EXEMP 278
35-
# define OPER 279
36-
# define ANEF 280
8+
using namespace std;
9+
10+
11+
// Pseudo enumeration of lexemes.
12+
class Lexeme {
13+
public:
14+
inline static const std::string PLUS = "PLUS";
15+
inline static const std::string MINUS = "MINUS";
16+
inline static const std::string TIMES = "TIMES";
17+
inline static const std::string DIV = "DIV";
18+
inline static const std::string BACKSLASH = "BACKSLASH";
19+
inline static const std::string LPAREN = "LPAREN";
20+
inline static const std::string RPAREN = "RPAREN";
21+
inline static const std::string LBRACK = "LBRACK";
22+
inline static const std::string RBRACK = "RBRACK";
23+
inline static const std::string DOT = "DOT";
24+
inline static const std::string DQUOTE = "DQUOTE";
25+
inline static const std::string EQUAL = "EQUAL";
26+
inline static const std::string SEMICOLON = "SEMICOLON";
27+
inline static const std::string COLON = "COLON";
28+
inline static const std::string COMMA = "COMMA";
29+
inline static const std::string ID = "ID";
30+
inline static const std::string NUM_I = "NUM_I";
31+
inline static const std::string NUM_D = "NUM_D";
32+
inline static const std::string STR = "STR";
33+
inline static const std::string NOTA = "NOTA";
34+
inline static const std::string ADESM = "ADESM";
35+
inline static const std::string DESM = "DESM";
36+
inline static const std::string INT = "INT";
37+
inline static const std::string DUPL = "DUPL";
38+
inline static const std::string SERIES = "SERIES";
39+
inline static const std::string EXEMP = "EXEMP";
40+
inline static const std::string OPER = "OPER";
41+
inline static const std::string ANEF = "ANEF";
42+
inline static const std::string RETURN = "RETURN";
43+
44+
static std::string semantic[];
45+
};
46+
47+
48+
class Token {
49+
public:
50+
std::string lexeme_str;
51+
std::string semantval();
52+
Token(std::string lexeme, std::string semant) {
53+
lexeme_str = lexeme;
54+
semant_str = semant;
55+
}
56+
Token(std::string lexeme) : Token(lexeme, "") {}
57+
Token() : Token("", "") {}
58+
private:
59+
std::string semant_str;
60+
};
61+
62+
63+
extern Token yylval;

src/utils/util.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@ void *mmalloc(size_t size) {
1010
}
1111

1212
str string(char *s) {
13-
str p = mmalloc(strlen(s)+1);
13+
str p = (str)mmalloc(strlen(s)+1);
1414
strcpy(p,s);
1515
return p;
1616
}

tests/Makefile

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
2+
test-tokens: $(BIN)/test_tokens
3+
$<
4+
5+
$(BIN)/test_tokens: $(OBJ)/tokens.o $(OBJ)/test_tokens.o
6+
$(CC) $^ -o $@ -lgtest -lgtest_main
7+
8+
$(OBJ)/%.o: $(TEST)/units/%.cpp
9+
$(CC) $(ARGS) -c $< -o $@

tests/test.classic

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
oper: int myfunc(int a, int b) {
2+
return a + b;
3+
}
4+
5+
exemp: MyType {
6+
int field1;
7+
series field2;
8+
anef meth1();
9+
}
10+
11+
MyType\anef meth1() {
12+
return 2.3439;
13+
}

tests/units/test_tokens.cpp

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,26 @@
1+
#include <gtest/gtest.h>
2+
3+
#include "../../src/lexical/tokens.h"
4+
5+
TEST(TokenTest, testinit_no_args) {
6+
Token t = Token();
7+
EXPECT_TRUE(t.lexeme_str == "");
8+
EXPECT_TRUE(t.semantval() == "");
9+
}
10+
11+
TEST(TokenTest, testinit_lexeme) {
12+
Token t = Token("ID");
13+
EXPECT_TRUE(t.lexeme_str == "ID");
14+
EXPECT_TRUE(t.semantval() == "");
15+
}
16+
17+
TEST(TokenTest, testinit_all_args) {
18+
Token t = Token("ID", "myvar");
19+
EXPECT_TRUE(t.lexeme_str == "ID");
20+
EXPECT_TRUE(t.semantval() == "myvar");
21+
}
22+
23+
TEST(TokenTest, testsemantval) {
24+
Token t = Token("ANEF", "value");
25+
EXPECT_TRUE(t.semantval() == "");
26+
}

0 commit comments

Comments
 (0)