merge lexical branch

Maxcode123 · Maxcode123 · commit 10f24521a47c · 2023-05-07T13:53:07.000+03:00
diff --git a/.gitignore b/.gitignore
@@ -50,3 +50,7 @@ modules.order
 Module.symvers
 Mkfile.old
 dkms.conf
+
+obj/*
+bin/*
+.vscode
diff --git a/Makefile b/Makefile
@@ -0,0 +1,38 @@
+CC=g++
+SRC=src
+BIN=bin
+OBJ=obj
+ARGS=-g
+TEST=tests
+DB=gdb
+FILE=$(TEST)/test.classic
+
+
+include src/lexical/Makefile
+include tests/Makefile
+
+
+run-lexer: $(BIN)/lexer
+	$< $(FILE)
+
+debug-file: $(TEST)/bin/main
+	clear
+	$(DB) --args $< $(FILE)
+
+debug: $(TEST)/bin/main
+	clear
+	$(DB) $<
+
+run-file: $(TEST)/bin/main
+	clear
+	$< $(FILE)
+
+run: $(TEST)/bin/main
+	clear
+	$<
+
+$(OBJ)/%.o: $(SRC)/utils/%.c
+	$(CC) $(ARGS) -c $< -o $@
+
+clean:
+	rm -rf obj/* bin/* src/lexical/lex.yy.c src/lexical/lex.h
diff --git a/bin/.placeholder b/bin/.placeholder
diff --git a/obj/.placeholder b/obj/.placeholder
diff --git a/src/lexical/Makefile b/src/lexical/Makefile
@@ -0,0 +1,13 @@
+lexer: $(BIN)/lexer
+
+$(BIN)/lexer: $(OBJ)/scan.o $(OBJ)/lex.yy.o $(OBJ)/util.o $(OBJ)/tokens.o
+	$(CC) $(ARGS) $^ -o $@
+
+$(SRC)/lexical/lex.yy.c: $(SRC)/lexical/classic.lex
+	lex -v -o $@ $<
+
+$(OBJ)/%.o: $(SRC)/lexical/%.cpp
+	$(CC) $(ARGS) -c $< -o $@
+
+$(OBJ)/%.o: $(SRC)/lexical/%.c
+	$(CC) $(ARGS) -c $< -o $@
diff --git a/src/lexical/classic.lex b/src/lexical/classic.lex
@@ -0,0 +1,70 @@
+%{
+#include <string.h>
+#include <stdlib.h>
+#include <stdio.h>
+
+#include "tokens.h"
+
+extern int pos;
+int _pos=1;
+
+extern "C" int yywrap(void)
+{
+ _pos=1;
+ return 1;
+}
+
+void adjust(void)
+{
+ pos=_pos;
+ _pos+=yyleng;
+}
+
+%}
+
+id [a-zA-Z][a-zA-Z0-9]*
+int [-]?[0-9]+
+double [-+]?[0-9]+\.?[0-9]*
+
+%Start COMMENT
+%%
+
+<INITIAL>nota {adjust(); yylval = Token(Lexeme::NOTA); return 1;}
+<INITIAL>adesm {adjust(); yylval = Token(Lexeme::ADESM); return 1;}
+<INITIAL>desm {adjust(); yylval = Token(Lexeme::DESM); return 1;}
+<INITIAL>int {adjust(); yylval = Token(Lexeme::INT); return 1;}
+<INITIAL>dupl {adjust(); yylval = Token(Lexeme::DUPL); return 1;}
+<INITIAL>series {adjust(); yylval = Token(Lexeme::SERIES); return 1;}
+<INITIAL>exemp {adjust(); yylval = Token(Lexeme::EXEMP); return 1;}
+<INITIAL>oper {adjust(); yylval = Token(Lexeme::OPER); return 1;}
+<INITIAL>anef {adjust(); yylval = Token(Lexeme::ANEF); return 1;}
+<INITIAL>return {adjust(); yylval = Token(Lexeme::RETURN); return 1;}
+
+<INITIAL>{id} {adjust(); yylval = Token(Lexeme::ID, yytext); return 1;}
+<INITIAL>{int} {adjust(); yylval = Token(Lexeme::NUM_I, yytext); return 1;}
+<INITIAL>{double} {adjust(); yylval = Token(Lexeme::NUM_D, yytext); return 1;}
+
+<INITIAL>"+" {adjust(); yylval = Token(Lexeme::PLUS); return 1;}
+<INITIAL>"-" {adjust(); yylval = Token(Lexeme::MINUS); return 1;}
+<INITIAL>"*" {adjust(); yylval = Token(Lexeme::TIMES); return 1;}
+<INITIAL>"/" {adjust(); yylval = Token(Lexeme::DIV); return 1;}
+<INITIAL>"\\" {adjust(); yylval = Token(Lexeme::BACKSLASH); return 1;}
+<INITIAL>"(" {adjust(); yylval = Token(Lexeme::LPAREN); return 1;}
+<INITIAL>")" {adjust(); yylval = Token(Lexeme::RPAREN); return 1;}
+<INITIAL>"{" {adjust(); yylval = Token(Lexeme::LBRACK); return 1;}
+<INITIAL>"}" {adjust(); yylval = Token(Lexeme::RBRACK); return 1;}
+<INITIAL>"." {adjust(); yylval = Token(Lexeme::DOT); return 1;}
+<INITIAL>"\"" {adjust(); yylval = Token(Lexeme::DQUOTE); return 1;}
+<INITIAL>"=" {adjust(); yylval = Token(Lexeme::EQUAL); return 1;}
+<INITIAL>";" {adjust(); yylval = Token(Lexeme::SEMICOLON); return 1;}
+<INITIAL>":" {adjust(); yylval = Token(Lexeme::COLON); return 1;}
+<INITIAL>"," {adjust(); yylval = Token(Lexeme::COMMA); return 1;}
+
+<INITIAL>" " {adjust(); continue;}
+<INITIAL>\n	 {adjust(); continue;}
+<INITIAL>\t {adjust(); continue;}
+<INITIAL>.	 {adjust(); fprintf(stderr, "illegal token");}
+
+<INITIAL>"/*" {adjust(); BEGIN COMMENT;}
+<COMMENT>"*/" {adjust(); BEGIN INITIAL;}
+<COMMENT>. {adjust();}
diff --git a/src/lexical/scan.cpp b/src/lexical/scan.cpp
@@ -0,0 +1,20 @@
+#include "scan.h"
+
+
+int main(int argc, char **argv) {
+    if (argc != 2) {
+        fprintf(stderr, "usage: ./a.out filename\n");
+        exit(1);
+    }
+    
+    yyin = fopen(argv[1], "r");
+    int i;
+
+
+    for (;;) {
+        i = yylex();
+        if (i == 0) break;
+        std::cout << yylval.lexeme_str << " " << pos << " " << yylval.semantval() << "\n";
+    }
+    return 0;
+}
diff --git a/src/lexical/scan.h b/src/lexical/scan.h
@@ -0,0 +1,17 @@
+#pragma once
+
+#include <unordered_map>
+#include <string>
+#include <iostream>
+
+#include "tokens.h"
+
+using namespace std;
+
+extern FILE *yyin;
+
+Token yylval;
+
+int pos = 0;
+
+int yylex(void); /* prototype for the lexing function */
diff --git a/src/lexical/tokens.cpp b/src/lexical/tokens.cpp
@@ -0,0 +1,10 @@
+#include "tokens.h"
+
+std::string Lexeme::semantic[] = {Lexeme::ID, Lexeme::NUM_I, Lexeme::NUM_D, Lexeme::STR};
+
+std::string Token::semantval() {
+    if (std::find(std::begin(Lexeme::semantic), std::end(Lexeme::semantic), lexeme_str) != std::end(Lexeme::semantic)) {
+        return semant_str;
+    }
+    return "";
+}
diff --git a/src/lexical/tokens.h b/src/lexical/tokens.h
@@ -1,36 +1,63 @@
 #pragma once
 
+#include <string>
+#include <algorithm>
+
 #include "../utils/util.h"
 
-typedef union {
-    int pos;
-    int ival;
-    str sval;
-} YYSTYPE;
-extern YYSTYPE yylval;
-
-
-# define PLUS 257  // +
-# define MINUS 258  // -
-# define TIMES 259  // *
-# define DIV 260  // /
-# define BACKSLASH 261  /* \ */
-# define LPAREN 262  2// ()
-# define RPAREN 263  // )
-# define LBRACK 264  // {
-# define RBRACK 265  // }
-# define DOT 266  // .
-# define DQUOTE 267  // "
-# define EQUAL 268  // =
-# define SEMICOLON 269  // ;
-# define COLON 270  // :
-# define ID 271  
-# define NUM 272
-# define NOTA 273
-# define ADESM 274
-# define DESM 275
-# define INT 276
-# define SERIES 277
-# define EXEMP 278
-# define OPER 279
-# define ANEF 280
+using namespace std;
+
+
+// Pseudo enumeration of lexemes.
+class Lexeme {
+    public:
+        inline static const std::string PLUS = "PLUS";
+        inline static const std::string MINUS = "MINUS";
+        inline static const std::string TIMES = "TIMES";
+        inline static const std::string DIV = "DIV";
+        inline static const std::string BACKSLASH = "BACKSLASH";
+        inline static const std::string LPAREN = "LPAREN";
+        inline static const std::string RPAREN = "RPAREN";
+        inline static const std::string LBRACK = "LBRACK";
+        inline static const std::string RBRACK = "RBRACK";
+        inline static const std::string DOT = "DOT";
+        inline static const std::string DQUOTE = "DQUOTE";
+        inline static const std::string EQUAL = "EQUAL";
+        inline static const std::string SEMICOLON = "SEMICOLON";
+        inline static const std::string COLON = "COLON";
+        inline static const std::string COMMA = "COMMA";
+        inline static const std::string ID = "ID";
+        inline static const std::string NUM_I = "NUM_I";
+        inline static const std::string NUM_D = "NUM_D";
+        inline static const std::string STR = "STR";
+        inline static const std::string NOTA = "NOTA";
+        inline static const std::string ADESM = "ADESM";
+        inline static const std::string DESM = "DESM";
+        inline static const std::string INT = "INT";
+        inline static const std::string DUPL = "DUPL";
+        inline static const std::string SERIES = "SERIES";
+        inline static const std::string EXEMP = "EXEMP";
+        inline static const std::string OPER = "OPER";
+        inline static const std::string ANEF = "ANEF";
+        inline static const std::string RETURN = "RETURN";
+
+        static std::string semantic[];
+};
+
+
+class Token {
+    public:
+        std::string lexeme_str;
+        std::string semantval();
+        Token(std::string lexeme, std::string semant) {
+            lexeme_str = lexeme;
+            semant_str = semant;
+            }
+        Token(std::string lexeme) : Token(lexeme, "") {}
+        Token() : Token("", "") {}
+    private:
+        std::string semant_str;
+};
+
+
+extern Token yylval;
diff --git a/src/utils/util.c b/src/utils/util.c
@@ -10,7 +10,7 @@ void *mmalloc(size_t size) {
 }
 
 str string(char *s) {
-    str p = mmalloc(strlen(s)+1);
+    str p = (str)mmalloc(strlen(s)+1);
     strcpy(p,s);
     return p;
 }
diff --git a/tests/Makefile b/tests/Makefile
@@ -0,0 +1,9 @@
+
+test-tokens: $(BIN)/test_tokens
+	$<
+
+$(BIN)/test_tokens: $(OBJ)/tokens.o $(OBJ)/test_tokens.o
+	$(CC) $^ -o $@ -lgtest -lgtest_main
+
+$(OBJ)/%.o: $(TEST)/units/%.cpp
+	$(CC) $(ARGS) -c $< -o $@
diff --git a/tests/test.classic b/tests/test.classic
@@ -0,0 +1,13 @@
+oper: int  myfunc(int a, int b) {
+	return a + b;
+}
+
+exemp: MyType {
+	int field1;
+	series field2;
+	anef meth1();
+}
+
+MyType\anef meth1() {
+	return 2.3439;
+}
diff --git a/tests/units/test_tokens.cpp b/tests/units/test_tokens.cpp
@@ -0,0 +1,26 @@
+#include <gtest/gtest.h>
+
+#include "../../src/lexical/tokens.h"
+
+TEST(TokenTest, testinit_no_args) {
+    Token t = Token();
+    EXPECT_TRUE(t.lexeme_str == "");    
+    EXPECT_TRUE(t.semantval() == "");
+}
+
+TEST(TokenTest, testinit_lexeme) {
+    Token t = Token("ID");
+    EXPECT_TRUE(t.lexeme_str == "ID");
+    EXPECT_TRUE(t.semantval() == "");
+}
+
+TEST(TokenTest, testinit_all_args) {
+    Token t = Token("ID", "myvar");
+    EXPECT_TRUE(t.lexeme_str == "ID");
+    EXPECT_TRUE(t.semantval() == "myvar");
+}
+
+TEST(TokenTest, testsemantval) {
+    Token t = Token("ANEF", "value");
+    EXPECT_TRUE(t.semantval() == "");
+}

Original file line number	Diff line number	Diff line change
`@@ -10,7 +10,7 @@ void *mmalloc(size_t size) {`
`10`	`10`	`}`
`11`	`11`
`12`	`12`	`str string(char *s) {`
`13`		`- str p = mmalloc(strlen(s)+1);`
	`13`	`+ str p = (str)mmalloc(strlen(s)+1);`
`14`	`14`	`strcpy(p,s);`
`15`	`15`	`return p;`
`16`	`16`	`}`