LexAn implementation done

This commit is contained in:
Gašper Dobrovoljc 2025-03-05 09:44:48 +01:00
parent 42fe2bf270
commit 56facf7372
No known key found for this signature in database
GPG Key ID: 0E7E037018CFA5A5
4 changed files with 229 additions and 94 deletions

5
.idea/codeStyles/codeStyleConfig.xml generated Normal file
View File

@ -0,0 +1,5 @@
<component name="ProjectCodeStyleConfiguration">
<state>
<option name="PREFERRED_PROJECT_CODE_STYLE" value="Default" />
</state>
</component>

View File

@ -1 +1,2 @@
= , && || ! == != > < >= <= + - * / % ^ ( ) var test = "1234" // hello world
var something = 33

View File

@ -1,5 +1,7 @@
package pins25.common; package pins25.common;
import java.util.HashMap;
/** /**
* Leksikalni simbol. * Leksikalni simbol.
* *
@ -13,73 +15,141 @@ public record Token(Report.Location location, Symbol symbol, String lexeme) impl
* Vrste leksikalnih simbolov. * Vrste leksikalnih simbolov.
*/ */
public enum Symbol { public enum Symbol {
/** Konec datoteke. */ /**
* Konec datoteke.
*/
EOF, EOF,
/** Stevilo. */ /**
* Stevilo.
*/
INTCONST, INTCONST,
/** Znak. */ /**
* Znak.
*/
CHARCONST, CHARCONST,
/** Niz znakov. */ /**
* Niz znakov.
*/
STRINGCONST, STRINGCONST,
/** Ime. */ /**
* Ime.
*/
IDENTIFIER, IDENTIFIER,
/** Kljucna beseda {@code fun}. */ /**
* Kljucna beseda {@code fun}.
*/
FUN, FUN,
/** Kljucna beseda {@code var}. */ /**
* Kljucna beseda {@code var}.
*/
VAR, VAR,
/** Kljucna beseda {@code if}. */ /**
* Kljucna beseda {@code if}.
*/
IF, IF,
/** Kljucna beseda {@code then}. */ /**
* Kljucna beseda {@code then}.
*/
THEN, THEN,
/** Kljucna beseda {@code else}. */ /**
* Kljucna beseda {@code else}.
*/
ELSE, ELSE,
/** Kljucna beseda {@code while}. */ /**
* Kljucna beseda {@code while}.
*/
WHILE, WHILE,
/** Kljucna beseda {@code do}. */ /**
* Kljucna beseda {@code do}.
*/
DO, DO,
/** Kljucna beseda {@code let}. */ /**
* Kljucna beseda {@code let}.
*/
LET, LET,
/** Kljucna beseda {@code in}. */ /**
* Kljucna beseda {@code in}.
*/
IN, IN,
/** Kljucna beseda {@code end}. */ /**
* Kljucna beseda {@code end}.
*/
END, END,
/** Simbol {@code =}. */ /**
* Simbol {@code =}.
*/
ASSIGN, ASSIGN,
/** Simbol {@code ,}. */ /**
* Simbol {@code ,}.
*/
COMMA, COMMA,
/** Simbol {@code &&}. */ /**
* Simbol {@code &&}.
*/
AND, AND,
/** Simbol {@code ||}. */ /**
* Simbol {@code ||}.
*/
OR, OR,
/** Simbol {@code !}. */ /**
* Simbol {@code !}.
*/
NOT, NOT,
/** Simbol {@code ==}. */ /**
* Simbol {@code ==}.
*/
EQU, EQU,
/** Simbol {@code !=}. */ /**
* Simbol {@code !=}.
*/
NEQ, NEQ,
/** Simbol {@code >}. */ /**
* Simbol {@code >}.
*/
GTH, GTH,
/** Simbol {@code <}. */ /**
* Simbol {@code <}.
*/
LTH, LTH,
/** Simbol {@code >=}. */ /**
* Simbol {@code >=}.
*/
GEQ, GEQ,
/** Simbol {@code <=}. */ /**
* Simbol {@code <=}.
*/
LEQ, LEQ,
/** Simbol {@code +}. */ /**
* Simbol {@code +}.
*/
ADD, ADD,
/** Simbol {@code -}. */ /**
* Simbol {@code -}.
*/
SUB, SUB,
/** Simbol {@code *}. */ /**
* Simbol {@code *}.
*/
MUL, MUL,
/** Simbol {@code /}. */ /**
* Simbol {@code /}.
*/
DIV, DIV,
/** Simbol {@code %}. */ /**
* Simbol {@code %}.
*/
MOD, MOD,
/** Simbol {@code ^}. */ /**
* Simbol {@code ^}.
*/
PTR, PTR,
/** Simbol {@code (}. */ /**
* Simbol {@code (}.
*/
LPAREN, LPAREN,
/** Simbol {@code )}. */ /**
* Simbol {@code )}.
*/
RPAREN, RPAREN,
} }

View File

@ -92,7 +92,7 @@ public class LexAn implements AutoCloseable {
return; return;
case '\t': // Prejsnji znak je tabulator, ta znak je morda potisnjen v desno. case '\t': // Prejsnji znak je tabulator, ta znak je morda potisnjen v desno.
buffChar = srcFile.read(); buffChar = srcFile.read();
while (buffCharColumn % 8 != 0) while (buffCharColumn % 4 != 0)
buffCharColumn += 1; buffCharColumn += 1;
buffCharColumn += 1; buffCharColumn += 1;
return; return;
@ -130,7 +130,7 @@ public class LexAn implements AutoCloseable {
Report.Location start = currentLocation(); Report.Location start = currentLocation();
switch (buffChar) { switch (buffChar) {
case -1: // EOF case -1: // EOF
buffToken = new Token(currentLocation(), Token.Symbol.EOF, null); buffToken = new Token(new Report.Location(0, 0), Token.Symbol.EOF, null);
return; return;
case '\'': case '\'':
@ -154,7 +154,6 @@ public class LexAn implements AutoCloseable {
} }
buffToken = new Token(start, Token.Symbol.ASSIGN, null); buffToken = new Token(start, Token.Symbol.ASSIGN, null);
nextChar();
return; return;
case ',': case ',':
@ -241,8 +240,17 @@ public class LexAn implements AutoCloseable {
return; return;
case '/': case '/':
buffToken = new Token(currentLocation(), Token.Symbol.DIV, null);
nextChar(); nextChar();
if (buffChar != '/') {
buffToken = new Token(currentLocation(), Token.Symbol.DIV, null);
return;
}
while (buffChar != '\n') {
nextChar();
}
nextToken();
return; return;
case '%': case '%':
@ -266,15 +274,19 @@ public class LexAn implements AutoCloseable {
return; return;
} }
if (buffChar >= '0' && buffChar <= '9') { if (isNumeric()) {
intConst(); intConst();
return; return;
} }
if (isAlpha()) {
identifier();
return;
}
throw new Report.Error(currentLocation(), "Unrecognized character '" + (char) buffChar + "'."); throw new Report.Error(currentLocation(), "Unrecognized character '" + (char) buffChar + "'.");
} }
private boolean isDigit() { private boolean isNumeric() {
return buffChar >= '0' && buffChar <= '9'; return buffChar >= '0' && buffChar <= '9';
} }
@ -286,6 +298,30 @@ public class LexAn implements AutoCloseable {
return buffChar >= '0' && buffChar <= '9' || buffChar >= 'a' && buffChar <= 'f'; return buffChar >= '0' && buffChar <= '9' || buffChar >= 'a' && buffChar <= 'f';
} }
private boolean isAlpha() {
return buffChar >= 'a' && buffChar <= 'z' || buffChar >= 'A' && buffChar <= 'Z' || buffChar == '_';
}
private boolean isAlphaNumeric() {
return buffChar >= 'a' && buffChar <= 'z' || buffChar >= 'A' && buffChar <= 'Z' || buffChar >= '0' && buffChar <= '9' || buffChar == '_';
}
private Token.Symbol getReservedWordSymbol(String word) {
return switch (word) {
case "fun" -> Token.Symbol.FUN;
case "var" -> Token.Symbol.VAR;
case "if" -> Token.Symbol.IF;
case "then" -> Token.Symbol.THEN;
case "else" -> Token.Symbol.ELSE;
case "while" -> Token.Symbol.WHILE;
case "do" -> Token.Symbol.DO;
case "let" -> Token.Symbol.LET;
case "in" -> Token.Symbol.IN;
case "end" -> Token.Symbol.END;
default -> null;
};
}
private void intConst() { private void intConst() {
Report.Location startLocation = currentLocation(); Report.Location startLocation = currentLocation();
Report.Location endLocation = currentLocation(); Report.Location endLocation = currentLocation();
@ -294,11 +330,11 @@ public class LexAn implements AutoCloseable {
if (buffChar == '0') { if (buffChar == '0') {
lexeme.append((char) buffChar); lexeme.append((char) buffChar);
nextChar(); nextChar();
if (isDigit()) { if (isNumeric()) {
throw new Report.Error(startLocation, "Leading zero is not allowed."); throw new Report.Error(startLocation, "Leading zero is not allowed.");
} }
} else { } else {
while (isDigit()) { while (isNumeric()) {
lexeme.append((char) buffChar); lexeme.append((char) buffChar);
endLocation = currentLocation(); endLocation = currentLocation();
nextChar(); nextChar();
@ -384,6 +420,29 @@ public class LexAn implements AutoCloseable {
nextChar(); nextChar();
} }
private void identifier() {
Report.Location startLocation = currentLocation();
Report.Location endLocation = currentLocation();
StringBuilder lexeme = new StringBuilder();
while (isAlphaNumeric()) {
lexeme.append((char) buffChar);
endLocation = currentLocation();
nextChar();
}
Token.Symbol symbol = getReservedWordSymbol(lexeme.toString());
if (symbol == null) {
symbol = Token.Symbol.IDENTIFIER;
}
buffToken = new Token(
new Report.Location(startLocation, endLocation),
symbol,
lexeme.toString()
);
}
/** /**
* Vrne trenutni leksikalni simbol, ki ostane v lastnistvu leksikalnega * Vrne trenutni leksikalni simbol, ki ostane v lastnistvu leksikalnega
* analizatorja. * analizatorja.