From b1e61398a66d74b20f29cc46273dd3e84964588c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ga=C5=A1per=20Dobrovoljc?= Date: Tue, 18 Mar 2025 19:24:19 +0100 Subject: [PATCH] WIP SynAn --- grammar.txt | 62 +++++++++ prg/Makefile | 2 +- prg/test.pins | 18 +-- src/pins25/common/Token.java | 240 ++++++++++++--------------------- src/pins25/phase/LexAn.java | 38 +++--- src/pins25/phase/SynAn.java | 249 +++++++++++++++++++++++++++++++++++ 6 files changed, 418 insertions(+), 191 deletions(-) create mode 100644 grammar.txt create mode 100644 src/pins25/phase/SynAn.java diff --git a/grammar.txt b/grammar.txt new file mode 100644 index 0000000..920531b --- /dev/null +++ b/grammar.txt @@ -0,0 +1,62 @@ +program -> definition restdefs . +restdefs -> definition restdefs | . + +definition -> fun IDENTIFIER LPAREN parameters RPAREN funcassign . +funcassign -> ASSIGN statements | . + +definition -> var IDENTIFIER ASSING initializers . + +parameters -> IDENTIFIER restparams | . +restparams -> COMMA parameters | . + +statements -> statement reststmts . +reststmts -> COMMA statements | . + +statement -> expression exprassign . +exprassign -> ASSIGN expression | . + +statement -> if expression then statements elsestmt end . +elsestmt -> else statements | . + +statement -> while expression do statements end . + +statement -> let definition reststmtdefs in statements end . +reststmtdefs -> definition reststmtdefs | . + +expression -> conjexpr restdisj . +restdisj -> OR expression | . + +conjexpr -> cmpexpr restconj . +restconj -> AND conjexpr | . + +cmpexpr -> addexpr restcmp . +restcmp -> cmpop cmpexpr | . +cmpop -> EQ | NEQ | LTH | GTH | LEQ | GEQ . + +addexpr -> multexpr restadd . +restadd -> addop addexpr | . +addop -> ADD | SUB . + +multexpr -> prefixexpr restmult . +restmult -> multop multexpr | . +multop -> MUL | DIV | MOD . + +prefixexpr -> prefixop postfixexpr . +prefixop -> NOT prefixop | ADD prefixop | SUB prefixop | PTR prefixop | . + +postfixexpr -> primary postfixop . +postfixop -> PTR postfixop | . + +primary -> const | LPAREN expression RPAREN | IDENTIFIER exprargs . +exprargs -> LPAREN arguments RPAREN | . + +arguments -> expression restargs | . +restargs -> COMMA restargs | . + +initializers -> initializer restinits | . +restinits -> COMMA initializers | . + +initializer -> INTCONST intconstmul | CHARCONST | STRINGCONST . +intconstmul -> MUL const | . + +const -> INTCONST | CHARCONST | STRINGCONST . \ No newline at end of file diff --git a/prg/Makefile b/prg/Makefile index b8ef6b2..0ac3283 100644 --- a/prg/Makefile +++ b/prg/Makefile @@ -2,5 +2,5 @@ JAVA = java --enable-preview .PHONY : % % : %.pins25 - $(JAVA) -classpath ../bin pins25.phase.LexAn $< + $(JAVA) -classpath ../bin pins25.phase.SynAn $< diff --git a/prg/test.pins b/prg/test.pins index 50c9cdf..4fbe95f 100644 --- a/prg/test.pins +++ b/prg/test.pins @@ -1,16 +1,2 @@ -1234567890 -= , && || ! == != > < >= <= + - * / % ^ ( ) -=,&&||!==!=><>=<=+-*/%^() -_a_0_ A0312 asd312 a_132asdnmznkjNAJDKNSJKDF -fun var if then else while do let in end -_fun_ _var_ _if_ _then_ _else_ _while_ _do_ _let_ _in_ _end_ -infun invar inif inthen inelse inwhile indo inlet inin inend -funin varin ifin thenin elsein whilein doin letin endin -// == * 1248129 d8as90d8as9d asdkal // -1234567890='a' // 1+2 --1/asd*'x'+"" -'a' '\'' '\\' '\0a' "\\ 1231 \n \ff _-ads" -"BESEDNJAKKKK" "0\" 0123" 0=XYZ 123+3+2+1(SSS) 4_bese - - -dica beseder_3456 \ No newline at end of file +fun test(a, b,,) = + var c = 0 \ No newline at end of file diff --git a/src/pins25/common/Token.java b/src/pins25/common/Token.java index 80ba735..8735449 100644 --- a/src/pins25/common/Token.java +++ b/src/pins25/common/Token.java @@ -1,168 +1,98 @@ package pins25.common; -import java.util.HashMap; - /** * Leksikalni simbol. - * + * * @param location Lokacija simbola v izvornem programu. * @param symbol Vrsta simbola. * @param lexeme Znakovna predstavitev simbola. */ public record Token(Report.Location location, Symbol symbol, String lexeme) implements Report.Locatable { - /** - * Vrste leksikalnih simbolov. - */ - public enum Symbol { - /** - * Konec datoteke. - */ - EOF, - /** - * Stevilo. - */ - INTCONST, - /** - * Znak. - */ - CHARCONST, - /** - * Niz znakov. - */ - STRINGCONST, - /** - * Ime. - */ - IDENTIFIER, - /** - * Kljucna beseda {@code fun}. - */ - FUN, - /** - * Kljucna beseda {@code var}. - */ - VAR, - /** - * Kljucna beseda {@code if}. - */ - IF, - /** - * Kljucna beseda {@code then}. - */ - THEN, - /** - * Kljucna beseda {@code else}. - */ - ELSE, - /** - * Kljucna beseda {@code while}. - */ - WHILE, - /** - * Kljucna beseda {@code do}. - */ - DO, - /** - * Kljucna beseda {@code let}. - */ - LET, - /** - * Kljucna beseda {@code in}. - */ - IN, - /** - * Kljucna beseda {@code end}. - */ - END, - /** - * Simbol {@code =}. - */ - ASSIGN, - /** - * Simbol {@code ,}. - */ - COMMA, - /** - * Simbol {@code &&}. - */ - AND, - /** - * Simbol {@code ||}. - */ - OR, - /** - * Simbol {@code !}. - */ - NOT, - /** - * Simbol {@code ==}. - */ - EQU, - /** - * Simbol {@code !=}. - */ - NEQ, - /** - * Simbol {@code >}. - */ - GTH, - /** - * Simbol {@code <}. - */ - LTH, - /** - * Simbol {@code >=}. - */ - GEQ, - /** - * Simbol {@code <=}. - */ - LEQ, - /** - * Simbol {@code +}. - */ - ADD, - /** - * Simbol {@code -}. - */ - SUB, - /** - * Simbol {@code *}. - */ - MUL, - /** - * Simbol {@code /}. - */ - DIV, - /** - * Simbol {@code %}. - */ - MOD, - /** - * Simbol {@code ^}. - */ - PTR, - /** - * Simbol {@code (}. - */ - LPAREN, - /** - * Simbol {@code )}. - */ - RPAREN, - } + /** + * Vrste leksikalnih simbolov. + */ + public enum Symbol { + /** Konec datoteke. */ + EOF, + /** Stevilo. */ + INTCONST, + /** Znak. */ + CHARCONST, + /** Niz znakov. */ + STRINGCONST, + /** Ime. */ + IDENTIFIER, + /** Kljucna beseda {@code fun}. */ + FUN, + /** Kljucna beseda {@code var}. */ + VAR, + /** Kljucna beseda {@code if}. */ + IF, + /** Kljucna beseda {@code then}. */ + THEN, + /** Kljucna beseda {@code else}. */ + ELSE, + /** Kljucna beseda {@code while}. */ + WHILE, + /** Kljucna beseda {@code do}. */ + DO, + /** Kljucna beseda {@code let}. */ + LET, + /** Kljucna beseda {@code in}. */ + IN, + /** Kljucna beseda {@code end}. */ + END, + /** Simbol {@code =}. */ + ASSIGN, + /** Simbol {@code ,}. */ + COMMA, + /** Simbol {@code &&}. */ + AND, + /** Simbol {@code ||}. */ + OR, + /** Simbol {@code !}. */ + NOT, + /** Simbol {@code ==}. */ + EQU, + /** Simbol {@code !=}. */ + NEQ, + /** Simbol {@code >}. */ + GTH, + /** Simbol {@code <}. */ + LTH, + /** Simbol {@code >=}. */ + GEQ, + /** Simbol {@code <=}. */ + LEQ, + /** Simbol {@code +}. */ + ADD, + /** Simbol {@code -}. */ + SUB, + /** Simbol {@code *}. */ + MUL, + /** Simbol {@code /}. */ + DIV, + /** Simbol {@code %}. */ + MOD, + /** Simbol {@code ^}. */ + PTR, + /** Simbol {@code (}. */ + LPAREN, + /** Simbol {@code )}. */ + RPAREN, + } - @Override - public String toString() { - String lexeme = switch (symbol) { - case INTCONST -> "(" + this.lexeme + ")"; - case CHARCONST -> "(" + this.lexeme + ")"; - case STRINGCONST -> "(" + this.lexeme + ")"; - case IDENTIFIER -> "(" + this.lexeme + ")"; - default -> ""; - }; - return location + " " + symbol + lexeme; - } + @Override + public String toString() { + String lexeme = switch (symbol) { + case INTCONST -> "(" + this.lexeme + ")"; + case CHARCONST -> "(" + this.lexeme + ")"; + case STRINGCONST -> "(" + this.lexeme + ")"; + case IDENTIFIER -> "(" + this.lexeme + ")"; + default -> ""; + }; + return location + " " + symbol + lexeme; + } } \ No newline at end of file diff --git a/src/pins25/phase/LexAn.java b/src/pins25/phase/LexAn.java index 76d8deb..e9e44f8 100644 --- a/src/pins25/phase/LexAn.java +++ b/src/pins25/phase/LexAn.java @@ -147,17 +147,17 @@ public class LexAn implements AutoCloseable { buffToken = new Token( new Report.Location(start, currentLocation()), Token.Symbol.EQU, - null + "==" ); nextChar(); return; } - buffToken = new Token(start, Token.Symbol.ASSIGN, null); + buffToken = new Token(start, Token.Symbol.ASSIGN, "="); return; case ',': - buffToken = new Token(currentLocation(), Token.Symbol.COMMA, null); + buffToken = new Token(currentLocation(), Token.Symbol.COMMA, ","); nextChar(); return; @@ -166,7 +166,7 @@ public class LexAn implements AutoCloseable { if (buffChar != '&') { throw new Report.Error(currentLocation(), "Invalid character '" + (char) buffChar + "'"); } - buffToken = new Token(new Report.Location(start, currentLocation()), Token.Symbol.AND, null); + buffToken = new Token(new Report.Location(start, currentLocation()), Token.Symbol.AND, "&&"); nextChar(); return; @@ -175,7 +175,7 @@ public class LexAn implements AutoCloseable { if (buffChar != '|') { throw new Report.Error(currentLocation(), "Invalid character '" + (char) buffChar + "'"); } - buffToken = new Token(new Report.Location(start, currentLocation()), Token.Symbol.OR, null); + buffToken = new Token(new Report.Location(start, currentLocation()), Token.Symbol.OR, "||"); nextChar(); return; @@ -185,13 +185,13 @@ public class LexAn implements AutoCloseable { buffToken = new Token( new Report.Location(start, currentLocation()), Token.Symbol.NEQ, - null + "!=" ); nextChar(); return; } - buffToken = new Token(start, Token.Symbol.NOT, null); + buffToken = new Token(start, Token.Symbol.NOT, "!"); return; case '>': @@ -200,13 +200,13 @@ public class LexAn implements AutoCloseable { buffToken = new Token( new Report.Location(start, currentLocation()), Token.Symbol.GEQ, - null + ">=" ); nextChar(); return; } - buffToken = new Token(start, Token.Symbol.GTH, null); + buffToken = new Token(start, Token.Symbol.GTH, ">"); return; case '<': @@ -215,27 +215,27 @@ public class LexAn implements AutoCloseable { buffToken = new Token( new Report.Location(start, currentLocation()), Token.Symbol.LEQ, - null + "<=" ); nextChar(); return; } - buffToken = new Token(start, Token.Symbol.LTH, null); + buffToken = new Token(start, Token.Symbol.LTH, "<"); return; case '+': - buffToken = new Token(currentLocation(), Token.Symbol.ADD, null); + buffToken = new Token(currentLocation(), Token.Symbol.ADD, "+"); nextChar(); return; case '-': - buffToken = new Token(currentLocation(), Token.Symbol.SUB, null); + buffToken = new Token(currentLocation(), Token.Symbol.SUB, "-"); nextChar(); return; case '*': - buffToken = new Token(currentLocation(), Token.Symbol.MUL, null); + buffToken = new Token(currentLocation(), Token.Symbol.MUL, "*"); nextChar(); return; @@ -243,7 +243,7 @@ public class LexAn implements AutoCloseable { start = currentLocation(); nextChar(); if (buffChar != '/') { - buffToken = new Token(start, Token.Symbol.DIV, null); + buffToken = new Token(start, Token.Symbol.DIV, "/"); return; } @@ -255,22 +255,22 @@ public class LexAn implements AutoCloseable { return; case '%': - buffToken = new Token(currentLocation(), Token.Symbol.MOD, null); + buffToken = new Token(currentLocation(), Token.Symbol.MOD, "%"); nextChar(); return; case '^': - buffToken = new Token(currentLocation(), Token.Symbol.PTR, null); + buffToken = new Token(currentLocation(), Token.Symbol.PTR, "^"); nextChar(); return; case '(': - buffToken = new Token(currentLocation(), Token.Symbol.LPAREN, null); + buffToken = new Token(currentLocation(), Token.Symbol.LPAREN, "("); nextChar(); return; case ')': - buffToken = new Token(currentLocation(), Token.Symbol.RPAREN, null); + buffToken = new Token(currentLocation(), Token.Symbol.RPAREN, ")"); nextChar(); return; } diff --git a/src/pins25/phase/SynAn.java b/src/pins25/phase/SynAn.java new file mode 100644 index 0000000..8eca340 --- /dev/null +++ b/src/pins25/phase/SynAn.java @@ -0,0 +1,249 @@ +package pins25.phase; + +import pins25.common.*; + +import java.util.Objects; + +/** + * Sintaksni analizator. + */ +public class SynAn implements AutoCloseable { + + /** + * Leksikalni analizator. + */ + private final LexAn lexAn; + + /** + * Ustvari nov sintaksni analizator. + * + * @param srcFileName Ime izvorne datoteke. + */ + public SynAn(final String srcFileName) { + this.lexAn = new LexAn(srcFileName); + } + + @Override + public void close() { + lexAn.close(); + } + + /** + * Prevzame leksikalni analizator od leksikalnega analizatorja in preveri, ali + * je prave vrste. + * + * @param symbol Pričakovana vrsta leksikalnega simbola. + * @return Prevzeti leksikalni simbol. + */ + private Token check(Token.Symbol symbol) { + final Token token = lexAn.takeToken(); + if (token.symbol() != symbol) + throw new Report.Error(token, "Unexpected symbol '" + token.lexeme() + "', expected " + symbol + "."); + return token; + } + + /** + * Opravi sintaksno analizo. + */ + public void parse() { + parseProgram(); + + if (lexAn.peekToken().symbol() != Token.Symbol.EOF) + Report.warning(lexAn.peekToken(), + "Unexpected text '" + lexAn.peekToken().lexeme() + "...' at the end of the program."); + } + + private void parseProgram() { + parseDefinition(); + parseRestDefinitions(); + } + + private void parseRestDefinitions() { + if (lexAn.peekToken().symbol() != Token.Symbol.EOF) { + parseDefinition(); + parseRestDefinitions(); + } + } + + private void parseDefinition() { + Token token = lexAn.peekToken(); + switch (token.symbol()) { + case Token.Symbol.FUN: + check(Token.Symbol.FUN); + check(Token.Symbol.IDENTIFIER); + check(Token.Symbol.LPAREN); + parseParameters(); + check(Token.Symbol.RPAREN); + parseFunctionAssignment(); + break; + + case Token.Symbol.VAR: + check(Token.Symbol.VAR); + check(Token.Symbol.IDENTIFIER); + check(Token.Symbol.ASSIGN); + parseInitializers(); + break; + + default: + throw new Report.Error(token, "Unexpected symbol '" + token.lexeme() + "', expected FUN or VAR."); + } + } + + private void parseFunctionAssignment() { + Token token = lexAn.peekToken(); + switch (token.symbol()) { + case Token.Symbol.FUN: + case Token.Symbol.VAR: + case Token.Symbol.IN: + case Token.Symbol.EOF: + break; + + case Token.Symbol.ASSIGN: + check(Token.Symbol.ASSIGN); + parseStatements(); + break; + + default: + throw new Report.Error(token, "Unexpected symbol '" + token.lexeme() + "', expected ASSIGN."); + } + } + + private void parseParameters() { + Token token = lexAn.peekToken(); + switch (token.symbol()) { + case Token.Symbol.IDENTIFIER: + check(Token.Symbol.IDENTIFIER); + parseRestParameters(); + break; + + case Token.Symbol.RPAREN: + break; + + default: + throw new Report.Error(token, "Unexpected symbol '" + token.lexeme() + "', expected IDENTIFIER or RPAREN."); + } + } + + private void parseRestParameters() { + Token token = lexAn.peekToken(); + switch (token.symbol()) { + case Token.Symbol.RPAREN: + break; + + case Token.Symbol.COMMA: + check(Token.Symbol.COMMA); + check(Token.Symbol.IDENTIFIER); + parseRestParameters(); + break; + + default: + throw new Report.Error(token, "Unexpected symbol '" + token.lexeme() + "', expected ')' or ','."); + } + } + + private void parseStatements() { + parseStatement(); + parseRestStatements(); + } + + private void parseStatement() { + Token token = lexAn.peekToken(); + switch (token.symbol()) { + case Token.Symbol.IDENTIFIER: + case Token.Symbol.LPAREN: + case Token.Symbol.ADD: + case Token.Symbol.SUB: + case Token.Symbol.NOT: + case Token.Symbol.PTR: + case Token.Symbol.INTCONST: + case Token.Symbol.CHARCONST: + case Token.Symbol.STRINGCONST: + parseExpression(); + parseExpressionAssign(); + break; + + case Token.Symbol.IF: + check(Token.Symbol.IF); + parseExpression(); + check(Token.Symbol.THEN); + parseStatements(); + parseElseStatement(); + check(Token.Symbol.END); + break; + + case Token.Symbol.WHILE: + check(Token.Symbol.WHILE); + parseExpression(); + check(Token.Symbol.DO); + parseStatements(); + check(Token.Symbol.END); + break; + + case Token.Symbol.LET: + check(Token.Symbol.LET); + parseDefinition(); + parseRestStatementDefinitions(); + check(Token.Symbol.IN); + parseStatements(); + check(Token.Symbol.END); + break; + } + } + + private void parseRestStatements() { + if (lexAn.peekToken().symbol() == Token.Symbol.COMMA) { + check(Token.Symbol.COMMA); + parseStatements(); + } + } + + private void parseRestStatementDefinitions() { + + } + + private void parseInitializers() { + + } + + private void parseExpression() { + } + + private void parseExpressionAssign() { + } + + private void parseElseStatement() { + } + + // --- ZAGON --- + + /** + * Zagon sintaksnega analizatorja kot samostojnega programa. + * + * @param cmdLineArgs Argumenti v ukazni vrstici. + */ + public static void main(final String[] cmdLineArgs) { + System.out.println("This is PINS'25 compiler (syntax analysis):"); + + try { + if (cmdLineArgs.length == 0) + throw new Report.Error("No source file specified in the command line."); + if (cmdLineArgs.length > 1) + Report.warning("Unused arguments in the command line."); + + try (SynAn synAn = new SynAn(cmdLineArgs[0])) { + synAn.parse(); + } + + // Upajmo, da kdaj pridemo to te tocke. + // A zavedajmo se sledecega: + // 1. Prevod je zaradi napak v programu lahko napacen :-o + // 2. Izvorni program se zdalec ni tisto, kar je programer hotel, da bi bil ;-) + Report.info("Done."); + } catch (Report.Error error) { + // Izpis opisa napake. + System.err.println(error.getMessage()); + System.exit(1); + } + } + +}