LexAn implementation done

This commit is contained in:
Gašper Dobrovoljc 2025-03-05 09:44:48 +01:00
parent 42fe2bf270
commit 56facf7372
No known key found for this signature in database
GPG Key ID: 0E7E037018CFA5A5
4 changed files with 229 additions and 94 deletions

5
.idea/codeStyles/codeStyleConfig.xml generated Normal file
View File

@ -0,0 +1,5 @@
<component name="ProjectCodeStyleConfiguration">
<state>
<option name="PREFERRED_PROJECT_CODE_STYLE" value="Default" />
</state>
</component>

View File

@ -1 +1,2 @@
= , && || ! == != > < >= <= + - * / % ^ ( )
var test = "1234" // hello world
var something = 33

View File

@ -1,5 +1,7 @@
package pins25.common;
import java.util.HashMap;
/**
* Leksikalni simbol.
*
@ -13,73 +15,141 @@ public record Token(Report.Location location, Symbol symbol, String lexeme) impl
* Vrste leksikalnih simbolov.
*/
public enum Symbol {
/** Konec datoteke. */
/**
* Konec datoteke.
*/
EOF,
/** Stevilo. */
/**
* Stevilo.
*/
INTCONST,
/** Znak. */
/**
* Znak.
*/
CHARCONST,
/** Niz znakov. */
/**
* Niz znakov.
*/
STRINGCONST,
/** Ime. */
/**
* Ime.
*/
IDENTIFIER,
/** Kljucna beseda {@code fun}. */
/**
* Kljucna beseda {@code fun}.
*/
FUN,
/** Kljucna beseda {@code var}. */
/**
* Kljucna beseda {@code var}.
*/
VAR,
/** Kljucna beseda {@code if}. */
/**
* Kljucna beseda {@code if}.
*/
IF,
/** Kljucna beseda {@code then}. */
/**
* Kljucna beseda {@code then}.
*/
THEN,
/** Kljucna beseda {@code else}. */
/**
* Kljucna beseda {@code else}.
*/
ELSE,
/** Kljucna beseda {@code while}. */
/**
* Kljucna beseda {@code while}.
*/
WHILE,
/** Kljucna beseda {@code do}. */
/**
* Kljucna beseda {@code do}.
*/
DO,
/** Kljucna beseda {@code let}. */
/**
* Kljucna beseda {@code let}.
*/
LET,
/** Kljucna beseda {@code in}. */
/**
* Kljucna beseda {@code in}.
*/
IN,
/** Kljucna beseda {@code end}. */
/**
* Kljucna beseda {@code end}.
*/
END,
/** Simbol {@code =}. */
/**
* Simbol {@code =}.
*/
ASSIGN,
/** Simbol {@code ,}. */
/**
* Simbol {@code ,}.
*/
COMMA,
/** Simbol {@code &&}. */
/**
* Simbol {@code &&}.
*/
AND,
/** Simbol {@code ||}. */
/**
* Simbol {@code ||}.
*/
OR,
/** Simbol {@code !}. */
/**
* Simbol {@code !}.
*/
NOT,
/** Simbol {@code ==}. */
/**
* Simbol {@code ==}.
*/
EQU,
/** Simbol {@code !=}. */
/**
* Simbol {@code !=}.
*/
NEQ,
/** Simbol {@code >}. */
/**
* Simbol {@code >}.
*/
GTH,
/** Simbol {@code <}. */
/**
* Simbol {@code <}.
*/
LTH,
/** Simbol {@code >=}. */
/**
* Simbol {@code >=}.
*/
GEQ,
/** Simbol {@code <=}. */
/**
* Simbol {@code <=}.
*/
LEQ,
/** Simbol {@code +}. */
/**
* Simbol {@code +}.
*/
ADD,
/** Simbol {@code -}. */
/**
* Simbol {@code -}.
*/
SUB,
/** Simbol {@code *}. */
/**
* Simbol {@code *}.
*/
MUL,
/** Simbol {@code /}. */
/**
* Simbol {@code /}.
*/
DIV,
/** Simbol {@code %}. */
/**
* Simbol {@code %}.
*/
MOD,
/** Simbol {@code ^}. */
/**
* Simbol {@code ^}.
*/
PTR,
/** Simbol {@code (}. */
/**
* Simbol {@code (}.
*/
LPAREN,
/** Simbol {@code )}. */
/**
* Simbol {@code )}.
*/
RPAREN,
}

View File

@ -92,7 +92,7 @@ public class LexAn implements AutoCloseable {
return;
case '\t': // Prejsnji znak je tabulator, ta znak je morda potisnjen v desno.
buffChar = srcFile.read();
while (buffCharColumn % 8 != 0)
while (buffCharColumn % 4 != 0)
buffCharColumn += 1;
buffCharColumn += 1;
return;
@ -130,7 +130,7 @@ public class LexAn implements AutoCloseable {
Report.Location start = currentLocation();
switch (buffChar) {
case -1: // EOF
buffToken = new Token(currentLocation(), Token.Symbol.EOF, null);
buffToken = new Token(new Report.Location(0, 0), Token.Symbol.EOF, null);
return;
case '\'':
@ -154,7 +154,6 @@ public class LexAn implements AutoCloseable {
}
buffToken = new Token(start, Token.Symbol.ASSIGN, null);
nextChar();
return;
case ',':
@ -241,8 +240,17 @@ public class LexAn implements AutoCloseable {
return;
case '/':
buffToken = new Token(currentLocation(), Token.Symbol.DIV, null);
nextChar();
if (buffChar != '/') {
buffToken = new Token(currentLocation(), Token.Symbol.DIV, null);
return;
}
while (buffChar != '\n') {
nextChar();
}
nextToken();
return;
case '%':
@ -266,15 +274,19 @@ public class LexAn implements AutoCloseable {
return;
}
if (buffChar >= '0' && buffChar <= '9') {
if (isNumeric()) {
intConst();
return;
}
if (isAlpha()) {
identifier();
return;
}
throw new Report.Error(currentLocation(), "Unrecognized character '" + (char) buffChar + "'.");
}
private boolean isDigit() {
private boolean isNumeric() {
return buffChar >= '0' && buffChar <= '9';
}
@ -286,6 +298,30 @@ public class LexAn implements AutoCloseable {
return buffChar >= '0' && buffChar <= '9' || buffChar >= 'a' && buffChar <= 'f';
}
private boolean isAlpha() {
return buffChar >= 'a' && buffChar <= 'z' || buffChar >= 'A' && buffChar <= 'Z' || buffChar == '_';
}
private boolean isAlphaNumeric() {
return buffChar >= 'a' && buffChar <= 'z' || buffChar >= 'A' && buffChar <= 'Z' || buffChar >= '0' && buffChar <= '9' || buffChar == '_';
}
private Token.Symbol getReservedWordSymbol(String word) {
return switch (word) {
case "fun" -> Token.Symbol.FUN;
case "var" -> Token.Symbol.VAR;
case "if" -> Token.Symbol.IF;
case "then" -> Token.Symbol.THEN;
case "else" -> Token.Symbol.ELSE;
case "while" -> Token.Symbol.WHILE;
case "do" -> Token.Symbol.DO;
case "let" -> Token.Symbol.LET;
case "in" -> Token.Symbol.IN;
case "end" -> Token.Symbol.END;
default -> null;
};
}
private void intConst() {
Report.Location startLocation = currentLocation();
Report.Location endLocation = currentLocation();
@ -294,11 +330,11 @@ public class LexAn implements AutoCloseable {
if (buffChar == '0') {
lexeme.append((char) buffChar);
nextChar();
if (isDigit()) {
if (isNumeric()) {
throw new Report.Error(startLocation, "Leading zero is not allowed.");
}
} else {
while (isDigit()) {
while (isNumeric()) {
lexeme.append((char) buffChar);
endLocation = currentLocation();
nextChar();
@ -384,6 +420,29 @@ public class LexAn implements AutoCloseable {
nextChar();
}
private void identifier() {
Report.Location startLocation = currentLocation();
Report.Location endLocation = currentLocation();
StringBuilder lexeme = new StringBuilder();
while (isAlphaNumeric()) {
lexeme.append((char) buffChar);
endLocation = currentLocation();
nextChar();
}
Token.Symbol symbol = getReservedWordSymbol(lexeme.toString());
if (symbol == null) {
symbol = Token.Symbol.IDENTIFIER;
}
buffToken = new Token(
new Report.Location(startLocation, endLocation),
symbol,
lexeme.toString()
);
}
/**
* Vrne trenutni leksikalni simbol, ki ostane v lastnistvu leksikalnega
* analizatorja.