LexAn implementation done

This commit is contained in:
Gašper Dobrovoljc 2025-03-05 09:44:48 +01:00
parent 42fe2bf270
commit 56facf7372
No known key found for this signature in database
GPG Key ID: 0E7E037018CFA5A5
4 changed files with 229 additions and 94 deletions

5
.idea/codeStyles/codeStyleConfig.xml generated Normal file
View File

@ -0,0 +1,5 @@
<component name="ProjectCodeStyleConfiguration">
<state>
<option name="PREFERRED_PROJECT_CODE_STYLE" value="Default" />
</state>
</component>

View File

@ -1 +1,2 @@
= , && || ! == != > < >= <= + - * / % ^ ( ) var test = "1234" // hello world
var something = 33

View File

@ -1,5 +1,7 @@
package pins25.common; package pins25.common;
import java.util.HashMap;
/** /**
* Leksikalni simbol. * Leksikalni simbol.
* *
@ -9,90 +11,158 @@ package pins25.common;
*/ */
public record Token(Report.Location location, Symbol symbol, String lexeme) implements Report.Locatable { public record Token(Report.Location location, Symbol symbol, String lexeme) implements Report.Locatable {
/** /**
* Vrste leksikalnih simbolov. * Vrste leksikalnih simbolov.
*/ */
public enum Symbol { public enum Symbol {
/** Konec datoteke. */ /**
EOF, * Konec datoteke.
/** Stevilo. */ */
INTCONST, EOF,
/** Znak. */ /**
CHARCONST, * Stevilo.
/** Niz znakov. */ */
STRINGCONST, INTCONST,
/** Ime. */ /**
IDENTIFIER, * Znak.
/** Kljucna beseda {@code fun}. */ */
FUN, CHARCONST,
/** Kljucna beseda {@code var}. */ /**
VAR, * Niz znakov.
/** Kljucna beseda {@code if}. */ */
IF, STRINGCONST,
/** Kljucna beseda {@code then}. */ /**
THEN, * Ime.
/** Kljucna beseda {@code else}. */ */
ELSE, IDENTIFIER,
/** Kljucna beseda {@code while}. */ /**
WHILE, * Kljucna beseda {@code fun}.
/** Kljucna beseda {@code do}. */ */
DO, FUN,
/** Kljucna beseda {@code let}. */ /**
LET, * Kljucna beseda {@code var}.
/** Kljucna beseda {@code in}. */ */
IN, VAR,
/** Kljucna beseda {@code end}. */ /**
END, * Kljucna beseda {@code if}.
/** Simbol {@code =}. */ */
ASSIGN, IF,
/** Simbol {@code ,}. */ /**
COMMA, * Kljucna beseda {@code then}.
/** Simbol {@code &&}. */ */
AND, THEN,
/** Simbol {@code ||}. */ /**
OR, * Kljucna beseda {@code else}.
/** Simbol {@code !}. */ */
NOT, ELSE,
/** Simbol {@code ==}. */ /**
EQU, * Kljucna beseda {@code while}.
/** Simbol {@code !=}. */ */
NEQ, WHILE,
/** Simbol {@code >}. */ /**
GTH, * Kljucna beseda {@code do}.
/** Simbol {@code <}. */ */
LTH, DO,
/** Simbol {@code >=}. */ /**
GEQ, * Kljucna beseda {@code let}.
/** Simbol {@code <=}. */ */
LEQ, LET,
/** Simbol {@code +}. */ /**
ADD, * Kljucna beseda {@code in}.
/** Simbol {@code -}. */ */
SUB, IN,
/** Simbol {@code *}. */ /**
MUL, * Kljucna beseda {@code end}.
/** Simbol {@code /}. */ */
DIV, END,
/** Simbol {@code %}. */ /**
MOD, * Simbol {@code =}.
/** Simbol {@code ^}. */ */
PTR, ASSIGN,
/** Simbol {@code (}. */ /**
LPAREN, * Simbol {@code ,}.
/** Simbol {@code )}. */ */
RPAREN, COMMA,
} /**
* Simbol {@code &&}.
*/
AND,
/**
* Simbol {@code ||}.
*/
OR,
/**
* Simbol {@code !}.
*/
NOT,
/**
* Simbol {@code ==}.
*/
EQU,
/**
* Simbol {@code !=}.
*/
NEQ,
/**
* Simbol {@code >}.
*/
GTH,
/**
* Simbol {@code <}.
*/
LTH,
/**
* Simbol {@code >=}.
*/
GEQ,
/**
* Simbol {@code <=}.
*/
LEQ,
/**
* Simbol {@code +}.
*/
ADD,
/**
* Simbol {@code -}.
*/
SUB,
/**
* Simbol {@code *}.
*/
MUL,
/**
* Simbol {@code /}.
*/
DIV,
/**
* Simbol {@code %}.
*/
MOD,
/**
* Simbol {@code ^}.
*/
PTR,
/**
* Simbol {@code (}.
*/
LPAREN,
/**
* Simbol {@code )}.
*/
RPAREN,
}
@Override @Override
public String toString() { public String toString() {
String lexeme = switch (symbol) { String lexeme = switch (symbol) {
case INTCONST -> "(" + this.lexeme + ")"; case INTCONST -> "(" + this.lexeme + ")";
case CHARCONST -> "(" + this.lexeme + ")"; case CHARCONST -> "(" + this.lexeme + ")";
case STRINGCONST -> "(" + this.lexeme + ")"; case STRINGCONST -> "(" + this.lexeme + ")";
case IDENTIFIER -> "(" + this.lexeme + ")"; case IDENTIFIER -> "(" + this.lexeme + ")";
default -> ""; default -> "";
}; };
return location + " " + symbol + lexeme; return location + " " + symbol + lexeme;
} }
} }

View File

@ -92,7 +92,7 @@ public class LexAn implements AutoCloseable {
return; return;
case '\t': // Prejsnji znak je tabulator, ta znak je morda potisnjen v desno. case '\t': // Prejsnji znak je tabulator, ta znak je morda potisnjen v desno.
buffChar = srcFile.read(); buffChar = srcFile.read();
while (buffCharColumn % 8 != 0) while (buffCharColumn % 4 != 0)
buffCharColumn += 1; buffCharColumn += 1;
buffCharColumn += 1; buffCharColumn += 1;
return; return;
@ -130,7 +130,7 @@ public class LexAn implements AutoCloseable {
Report.Location start = currentLocation(); Report.Location start = currentLocation();
switch (buffChar) { switch (buffChar) {
case -1: // EOF case -1: // EOF
buffToken = new Token(currentLocation(), Token.Symbol.EOF, null); buffToken = new Token(new Report.Location(0, 0), Token.Symbol.EOF, null);
return; return;
case '\'': case '\'':
@ -154,7 +154,6 @@ public class LexAn implements AutoCloseable {
} }
buffToken = new Token(start, Token.Symbol.ASSIGN, null); buffToken = new Token(start, Token.Symbol.ASSIGN, null);
nextChar();
return; return;
case ',': case ',':
@ -241,8 +240,17 @@ public class LexAn implements AutoCloseable {
return; return;
case '/': case '/':
buffToken = new Token(currentLocation(), Token.Symbol.DIV, null);
nextChar(); nextChar();
if (buffChar != '/') {
buffToken = new Token(currentLocation(), Token.Symbol.DIV, null);
return;
}
while (buffChar != '\n') {
nextChar();
}
nextToken();
return; return;
case '%': case '%':
@ -266,15 +274,19 @@ public class LexAn implements AutoCloseable {
return; return;
} }
if (buffChar >= '0' && buffChar <= '9') { if (isNumeric()) {
intConst(); intConst();
return; return;
} }
if (isAlpha()) {
identifier();
return;
}
throw new Report.Error(currentLocation(), "Unrecognized character '" + (char) buffChar + "'."); throw new Report.Error(currentLocation(), "Unrecognized character '" + (char) buffChar + "'.");
} }
private boolean isDigit() { private boolean isNumeric() {
return buffChar >= '0' && buffChar <= '9'; return buffChar >= '0' && buffChar <= '9';
} }
@ -286,6 +298,30 @@ public class LexAn implements AutoCloseable {
return buffChar >= '0' && buffChar <= '9' || buffChar >= 'a' && buffChar <= 'f'; return buffChar >= '0' && buffChar <= '9' || buffChar >= 'a' && buffChar <= 'f';
} }
private boolean isAlpha() {
return buffChar >= 'a' && buffChar <= 'z' || buffChar >= 'A' && buffChar <= 'Z' || buffChar == '_';
}
private boolean isAlphaNumeric() {
return buffChar >= 'a' && buffChar <= 'z' || buffChar >= 'A' && buffChar <= 'Z' || buffChar >= '0' && buffChar <= '9' || buffChar == '_';
}
private Token.Symbol getReservedWordSymbol(String word) {
return switch (word) {
case "fun" -> Token.Symbol.FUN;
case "var" -> Token.Symbol.VAR;
case "if" -> Token.Symbol.IF;
case "then" -> Token.Symbol.THEN;
case "else" -> Token.Symbol.ELSE;
case "while" -> Token.Symbol.WHILE;
case "do" -> Token.Symbol.DO;
case "let" -> Token.Symbol.LET;
case "in" -> Token.Symbol.IN;
case "end" -> Token.Symbol.END;
default -> null;
};
}
private void intConst() { private void intConst() {
Report.Location startLocation = currentLocation(); Report.Location startLocation = currentLocation();
Report.Location endLocation = currentLocation(); Report.Location endLocation = currentLocation();
@ -294,11 +330,11 @@ public class LexAn implements AutoCloseable {
if (buffChar == '0') { if (buffChar == '0') {
lexeme.append((char) buffChar); lexeme.append((char) buffChar);
nextChar(); nextChar();
if (isDigit()) { if (isNumeric()) {
throw new Report.Error(startLocation, "Leading zero is not allowed."); throw new Report.Error(startLocation, "Leading zero is not allowed.");
} }
} else { } else {
while (isDigit()) { while (isNumeric()) {
lexeme.append((char) buffChar); lexeme.append((char) buffChar);
endLocation = currentLocation(); endLocation = currentLocation();
nextChar(); nextChar();
@ -384,6 +420,29 @@ public class LexAn implements AutoCloseable {
nextChar(); nextChar();
} }
private void identifier() {
Report.Location startLocation = currentLocation();
Report.Location endLocation = currentLocation();
StringBuilder lexeme = new StringBuilder();
while (isAlphaNumeric()) {
lexeme.append((char) buffChar);
endLocation = currentLocation();
nextChar();
}
Token.Symbol symbol = getReservedWordSymbol(lexeme.toString());
if (symbol == null) {
symbol = Token.Symbol.IDENTIFIER;
}
buffToken = new Token(
new Report.Location(startLocation, endLocation),
symbol,
lexeme.toString()
);
}
/** /**
* Vrne trenutni leksikalni simbol, ki ostane v lastnistvu leksikalnega * Vrne trenutni leksikalni simbol, ki ostane v lastnistvu leksikalnega
* analizatorja. * analizatorja.