diff --git a/.idea/codeStyles/codeStyleConfig.xml b/.idea/codeStyles/codeStyleConfig.xml new file mode 100644 index 0000000..a55e7a1 --- /dev/null +++ b/.idea/codeStyles/codeStyleConfig.xml @@ -0,0 +1,5 @@ + + + + \ No newline at end of file diff --git a/prg/test.pins b/prg/test.pins index 251f915..bf201c5 100644 --- a/prg/test.pins +++ b/prg/test.pins @@ -1 +1,2 @@ -= , && || ! == != > < >= <= + - * / % ^ ( ) \ No newline at end of file +var test = "1234" // hello world +var something = 33 \ No newline at end of file diff --git a/src/pins25/common/Token.java b/src/pins25/common/Token.java index 8735449..80ba735 100644 --- a/src/pins25/common/Token.java +++ b/src/pins25/common/Token.java @@ -1,98 +1,168 @@ package pins25.common; +import java.util.HashMap; + /** * Leksikalni simbol. - * + * * @param location Lokacija simbola v izvornem programu. * @param symbol Vrsta simbola. * @param lexeme Znakovna predstavitev simbola. */ public record Token(Report.Location location, Symbol symbol, String lexeme) implements Report.Locatable { - /** - * Vrste leksikalnih simbolov. - */ - public enum Symbol { - /** Konec datoteke. */ - EOF, - /** Stevilo. */ - INTCONST, - /** Znak. */ - CHARCONST, - /** Niz znakov. */ - STRINGCONST, - /** Ime. */ - IDENTIFIER, - /** Kljucna beseda {@code fun}. */ - FUN, - /** Kljucna beseda {@code var}. */ - VAR, - /** Kljucna beseda {@code if}. */ - IF, - /** Kljucna beseda {@code then}. */ - THEN, - /** Kljucna beseda {@code else}. */ - ELSE, - /** Kljucna beseda {@code while}. */ - WHILE, - /** Kljucna beseda {@code do}. */ - DO, - /** Kljucna beseda {@code let}. */ - LET, - /** Kljucna beseda {@code in}. */ - IN, - /** Kljucna beseda {@code end}. */ - END, - /** Simbol {@code =}. */ - ASSIGN, - /** Simbol {@code ,}. */ - COMMA, - /** Simbol {@code &&}. */ - AND, - /** Simbol {@code ||}. */ - OR, - /** Simbol {@code !}. */ - NOT, - /** Simbol {@code ==}. */ - EQU, - /** Simbol {@code !=}. */ - NEQ, - /** Simbol {@code >}. */ - GTH, - /** Simbol {@code <}. */ - LTH, - /** Simbol {@code >=}. */ - GEQ, - /** Simbol {@code <=}. */ - LEQ, - /** Simbol {@code +}. */ - ADD, - /** Simbol {@code -}. */ - SUB, - /** Simbol {@code *}. */ - MUL, - /** Simbol {@code /}. */ - DIV, - /** Simbol {@code %}. */ - MOD, - /** Simbol {@code ^}. */ - PTR, - /** Simbol {@code (}. */ - LPAREN, - /** Simbol {@code )}. */ - RPAREN, - } + /** + * Vrste leksikalnih simbolov. + */ + public enum Symbol { + /** + * Konec datoteke. + */ + EOF, + /** + * Stevilo. + */ + INTCONST, + /** + * Znak. + */ + CHARCONST, + /** + * Niz znakov. + */ + STRINGCONST, + /** + * Ime. + */ + IDENTIFIER, + /** + * Kljucna beseda {@code fun}. + */ + FUN, + /** + * Kljucna beseda {@code var}. + */ + VAR, + /** + * Kljucna beseda {@code if}. + */ + IF, + /** + * Kljucna beseda {@code then}. + */ + THEN, + /** + * Kljucna beseda {@code else}. + */ + ELSE, + /** + * Kljucna beseda {@code while}. + */ + WHILE, + /** + * Kljucna beseda {@code do}. + */ + DO, + /** + * Kljucna beseda {@code let}. + */ + LET, + /** + * Kljucna beseda {@code in}. + */ + IN, + /** + * Kljucna beseda {@code end}. + */ + END, + /** + * Simbol {@code =}. + */ + ASSIGN, + /** + * Simbol {@code ,}. + */ + COMMA, + /** + * Simbol {@code &&}. + */ + AND, + /** + * Simbol {@code ||}. + */ + OR, + /** + * Simbol {@code !}. + */ + NOT, + /** + * Simbol {@code ==}. + */ + EQU, + /** + * Simbol {@code !=}. + */ + NEQ, + /** + * Simbol {@code >}. + */ + GTH, + /** + * Simbol {@code <}. + */ + LTH, + /** + * Simbol {@code >=}. + */ + GEQ, + /** + * Simbol {@code <=}. + */ + LEQ, + /** + * Simbol {@code +}. + */ + ADD, + /** + * Simbol {@code -}. + */ + SUB, + /** + * Simbol {@code *}. + */ + MUL, + /** + * Simbol {@code /}. + */ + DIV, + /** + * Simbol {@code %}. + */ + MOD, + /** + * Simbol {@code ^}. + */ + PTR, + /** + * Simbol {@code (}. + */ + LPAREN, + /** + * Simbol {@code )}. + */ + RPAREN, + } - @Override - public String toString() { - String lexeme = switch (symbol) { - case INTCONST -> "(" + this.lexeme + ")"; - case CHARCONST -> "(" + this.lexeme + ")"; - case STRINGCONST -> "(" + this.lexeme + ")"; - case IDENTIFIER -> "(" + this.lexeme + ")"; - default -> ""; - }; - return location + " " + symbol + lexeme; - } + @Override + public String toString() { + String lexeme = switch (symbol) { + case INTCONST -> "(" + this.lexeme + ")"; + case CHARCONST -> "(" + this.lexeme + ")"; + case STRINGCONST -> "(" + this.lexeme + ")"; + case IDENTIFIER -> "(" + this.lexeme + ")"; + default -> ""; + }; + return location + " " + symbol + lexeme; + } } \ No newline at end of file diff --git a/src/pins25/phase/LexAn.java b/src/pins25/phase/LexAn.java index a586c3a..853d72b 100644 --- a/src/pins25/phase/LexAn.java +++ b/src/pins25/phase/LexAn.java @@ -92,7 +92,7 @@ public class LexAn implements AutoCloseable { return; case '\t': // Prejsnji znak je tabulator, ta znak je morda potisnjen v desno. buffChar = srcFile.read(); - while (buffCharColumn % 8 != 0) + while (buffCharColumn % 4 != 0) buffCharColumn += 1; buffCharColumn += 1; return; @@ -130,7 +130,7 @@ public class LexAn implements AutoCloseable { Report.Location start = currentLocation(); switch (buffChar) { case -1: // EOF - buffToken = new Token(currentLocation(), Token.Symbol.EOF, null); + buffToken = new Token(new Report.Location(0, 0), Token.Symbol.EOF, null); return; case '\'': @@ -154,7 +154,6 @@ public class LexAn implements AutoCloseable { } buffToken = new Token(start, Token.Symbol.ASSIGN, null); - nextChar(); return; case ',': @@ -241,8 +240,17 @@ public class LexAn implements AutoCloseable { return; case '/': - buffToken = new Token(currentLocation(), Token.Symbol.DIV, null); nextChar(); + if (buffChar != '/') { + buffToken = new Token(currentLocation(), Token.Symbol.DIV, null); + return; + } + + while (buffChar != '\n') { + nextChar(); + } + + nextToken(); return; case '%': @@ -266,15 +274,19 @@ public class LexAn implements AutoCloseable { return; } - if (buffChar >= '0' && buffChar <= '9') { + if (isNumeric()) { intConst(); return; } + if (isAlpha()) { + identifier(); + return; + } throw new Report.Error(currentLocation(), "Unrecognized character '" + (char) buffChar + "'."); } - private boolean isDigit() { + private boolean isNumeric() { return buffChar >= '0' && buffChar <= '9'; } @@ -286,6 +298,30 @@ public class LexAn implements AutoCloseable { return buffChar >= '0' && buffChar <= '9' || buffChar >= 'a' && buffChar <= 'f'; } + private boolean isAlpha() { + return buffChar >= 'a' && buffChar <= 'z' || buffChar >= 'A' && buffChar <= 'Z' || buffChar == '_'; + } + + private boolean isAlphaNumeric() { + return buffChar >= 'a' && buffChar <= 'z' || buffChar >= 'A' && buffChar <= 'Z' || buffChar >= '0' && buffChar <= '9' || buffChar == '_'; + } + + private Token.Symbol getReservedWordSymbol(String word) { + return switch (word) { + case "fun" -> Token.Symbol.FUN; + case "var" -> Token.Symbol.VAR; + case "if" -> Token.Symbol.IF; + case "then" -> Token.Symbol.THEN; + case "else" -> Token.Symbol.ELSE; + case "while" -> Token.Symbol.WHILE; + case "do" -> Token.Symbol.DO; + case "let" -> Token.Symbol.LET; + case "in" -> Token.Symbol.IN; + case "end" -> Token.Symbol.END; + default -> null; + }; + } + private void intConst() { Report.Location startLocation = currentLocation(); Report.Location endLocation = currentLocation(); @@ -294,11 +330,11 @@ public class LexAn implements AutoCloseable { if (buffChar == '0') { lexeme.append((char) buffChar); nextChar(); - if (isDigit()) { + if (isNumeric()) { throw new Report.Error(startLocation, "Leading zero is not allowed."); } } else { - while (isDigit()) { + while (isNumeric()) { lexeme.append((char) buffChar); endLocation = currentLocation(); nextChar(); @@ -384,6 +420,29 @@ public class LexAn implements AutoCloseable { nextChar(); } + private void identifier() { + Report.Location startLocation = currentLocation(); + Report.Location endLocation = currentLocation(); + StringBuilder lexeme = new StringBuilder(); + + while (isAlphaNumeric()) { + lexeme.append((char) buffChar); + endLocation = currentLocation(); + nextChar(); + } + + Token.Symbol symbol = getReservedWordSymbol(lexeme.toString()); + if (symbol == null) { + symbol = Token.Symbol.IDENTIFIER; + } + + buffToken = new Token( + new Report.Location(startLocation, endLocation), + symbol, + lexeme.toString() + ); + } + /** * Vrne trenutni leksikalni simbol, ki ostane v lastnistvu leksikalnega * analizatorja.