LexAn implementation done
This commit is contained in:
parent
42fe2bf270
commit
56facf7372
5
.idea/codeStyles/codeStyleConfig.xml
generated
Normal file
5
.idea/codeStyles/codeStyleConfig.xml
generated
Normal file
@ -0,0 +1,5 @@
|
||||
<component name="ProjectCodeStyleConfiguration">
|
||||
<state>
|
||||
<option name="PREFERRED_PROJECT_CODE_STYLE" value="Default" />
|
||||
</state>
|
||||
</component>
|
@ -1 +1,2 @@
|
||||
= , && || ! == != > < >= <= + - * / % ^ ( )
|
||||
var test = "1234" // hello world
|
||||
var something = 33
|
@ -1,5 +1,7 @@
|
||||
package pins25.common;
|
||||
|
||||
import java.util.HashMap;
|
||||
|
||||
/**
|
||||
* Leksikalni simbol.
|
||||
*
|
||||
@ -9,90 +11,158 @@ package pins25.common;
|
||||
*/
|
||||
public record Token(Report.Location location, Symbol symbol, String lexeme) implements Report.Locatable {
|
||||
|
||||
/**
|
||||
* Vrste leksikalnih simbolov.
|
||||
*/
|
||||
public enum Symbol {
|
||||
/** Konec datoteke. */
|
||||
EOF,
|
||||
/** Stevilo. */
|
||||
INTCONST,
|
||||
/** Znak. */
|
||||
CHARCONST,
|
||||
/** Niz znakov. */
|
||||
STRINGCONST,
|
||||
/** Ime. */
|
||||
IDENTIFIER,
|
||||
/** Kljucna beseda {@code fun}. */
|
||||
FUN,
|
||||
/** Kljucna beseda {@code var}. */
|
||||
VAR,
|
||||
/** Kljucna beseda {@code if}. */
|
||||
IF,
|
||||
/** Kljucna beseda {@code then}. */
|
||||
THEN,
|
||||
/** Kljucna beseda {@code else}. */
|
||||
ELSE,
|
||||
/** Kljucna beseda {@code while}. */
|
||||
WHILE,
|
||||
/** Kljucna beseda {@code do}. */
|
||||
DO,
|
||||
/** Kljucna beseda {@code let}. */
|
||||
LET,
|
||||
/** Kljucna beseda {@code in}. */
|
||||
IN,
|
||||
/** Kljucna beseda {@code end}. */
|
||||
END,
|
||||
/** Simbol {@code =}. */
|
||||
ASSIGN,
|
||||
/** Simbol {@code ,}. */
|
||||
COMMA,
|
||||
/** Simbol {@code &&}. */
|
||||
AND,
|
||||
/** Simbol {@code ||}. */
|
||||
OR,
|
||||
/** Simbol {@code !}. */
|
||||
NOT,
|
||||
/** Simbol {@code ==}. */
|
||||
EQU,
|
||||
/** Simbol {@code !=}. */
|
||||
NEQ,
|
||||
/** Simbol {@code >}. */
|
||||
GTH,
|
||||
/** Simbol {@code <}. */
|
||||
LTH,
|
||||
/** Simbol {@code >=}. */
|
||||
GEQ,
|
||||
/** Simbol {@code <=}. */
|
||||
LEQ,
|
||||
/** Simbol {@code +}. */
|
||||
ADD,
|
||||
/** Simbol {@code -}. */
|
||||
SUB,
|
||||
/** Simbol {@code *}. */
|
||||
MUL,
|
||||
/** Simbol {@code /}. */
|
||||
DIV,
|
||||
/** Simbol {@code %}. */
|
||||
MOD,
|
||||
/** Simbol {@code ^}. */
|
||||
PTR,
|
||||
/** Simbol {@code (}. */
|
||||
LPAREN,
|
||||
/** Simbol {@code )}. */
|
||||
RPAREN,
|
||||
}
|
||||
/**
|
||||
* Vrste leksikalnih simbolov.
|
||||
*/
|
||||
public enum Symbol {
|
||||
/**
|
||||
* Konec datoteke.
|
||||
*/
|
||||
EOF,
|
||||
/**
|
||||
* Stevilo.
|
||||
*/
|
||||
INTCONST,
|
||||
/**
|
||||
* Znak.
|
||||
*/
|
||||
CHARCONST,
|
||||
/**
|
||||
* Niz znakov.
|
||||
*/
|
||||
STRINGCONST,
|
||||
/**
|
||||
* Ime.
|
||||
*/
|
||||
IDENTIFIER,
|
||||
/**
|
||||
* Kljucna beseda {@code fun}.
|
||||
*/
|
||||
FUN,
|
||||
/**
|
||||
* Kljucna beseda {@code var}.
|
||||
*/
|
||||
VAR,
|
||||
/**
|
||||
* Kljucna beseda {@code if}.
|
||||
*/
|
||||
IF,
|
||||
/**
|
||||
* Kljucna beseda {@code then}.
|
||||
*/
|
||||
THEN,
|
||||
/**
|
||||
* Kljucna beseda {@code else}.
|
||||
*/
|
||||
ELSE,
|
||||
/**
|
||||
* Kljucna beseda {@code while}.
|
||||
*/
|
||||
WHILE,
|
||||
/**
|
||||
* Kljucna beseda {@code do}.
|
||||
*/
|
||||
DO,
|
||||
/**
|
||||
* Kljucna beseda {@code let}.
|
||||
*/
|
||||
LET,
|
||||
/**
|
||||
* Kljucna beseda {@code in}.
|
||||
*/
|
||||
IN,
|
||||
/**
|
||||
* Kljucna beseda {@code end}.
|
||||
*/
|
||||
END,
|
||||
/**
|
||||
* Simbol {@code =}.
|
||||
*/
|
||||
ASSIGN,
|
||||
/**
|
||||
* Simbol {@code ,}.
|
||||
*/
|
||||
COMMA,
|
||||
/**
|
||||
* Simbol {@code &&}.
|
||||
*/
|
||||
AND,
|
||||
/**
|
||||
* Simbol {@code ||}.
|
||||
*/
|
||||
OR,
|
||||
/**
|
||||
* Simbol {@code !}.
|
||||
*/
|
||||
NOT,
|
||||
/**
|
||||
* Simbol {@code ==}.
|
||||
*/
|
||||
EQU,
|
||||
/**
|
||||
* Simbol {@code !=}.
|
||||
*/
|
||||
NEQ,
|
||||
/**
|
||||
* Simbol {@code >}.
|
||||
*/
|
||||
GTH,
|
||||
/**
|
||||
* Simbol {@code <}.
|
||||
*/
|
||||
LTH,
|
||||
/**
|
||||
* Simbol {@code >=}.
|
||||
*/
|
||||
GEQ,
|
||||
/**
|
||||
* Simbol {@code <=}.
|
||||
*/
|
||||
LEQ,
|
||||
/**
|
||||
* Simbol {@code +}.
|
||||
*/
|
||||
ADD,
|
||||
/**
|
||||
* Simbol {@code -}.
|
||||
*/
|
||||
SUB,
|
||||
/**
|
||||
* Simbol {@code *}.
|
||||
*/
|
||||
MUL,
|
||||
/**
|
||||
* Simbol {@code /}.
|
||||
*/
|
||||
DIV,
|
||||
/**
|
||||
* Simbol {@code %}.
|
||||
*/
|
||||
MOD,
|
||||
/**
|
||||
* Simbol {@code ^}.
|
||||
*/
|
||||
PTR,
|
||||
/**
|
||||
* Simbol {@code (}.
|
||||
*/
|
||||
LPAREN,
|
||||
/**
|
||||
* Simbol {@code )}.
|
||||
*/
|
||||
RPAREN,
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
String lexeme = switch (symbol) {
|
||||
case INTCONST -> "(" + this.lexeme + ")";
|
||||
case CHARCONST -> "(" + this.lexeme + ")";
|
||||
case STRINGCONST -> "(" + this.lexeme + ")";
|
||||
case IDENTIFIER -> "(" + this.lexeme + ")";
|
||||
default -> "";
|
||||
};
|
||||
return location + " " + symbol + lexeme;
|
||||
}
|
||||
@Override
|
||||
public String toString() {
|
||||
String lexeme = switch (symbol) {
|
||||
case INTCONST -> "(" + this.lexeme + ")";
|
||||
case CHARCONST -> "(" + this.lexeme + ")";
|
||||
case STRINGCONST -> "(" + this.lexeme + ")";
|
||||
case IDENTIFIER -> "(" + this.lexeme + ")";
|
||||
default -> "";
|
||||
};
|
||||
return location + " " + symbol + lexeme;
|
||||
}
|
||||
|
||||
}
|
@ -92,7 +92,7 @@ public class LexAn implements AutoCloseable {
|
||||
return;
|
||||
case '\t': // Prejsnji znak je tabulator, ta znak je morda potisnjen v desno.
|
||||
buffChar = srcFile.read();
|
||||
while (buffCharColumn % 8 != 0)
|
||||
while (buffCharColumn % 4 != 0)
|
||||
buffCharColumn += 1;
|
||||
buffCharColumn += 1;
|
||||
return;
|
||||
@ -130,7 +130,7 @@ public class LexAn implements AutoCloseable {
|
||||
Report.Location start = currentLocation();
|
||||
switch (buffChar) {
|
||||
case -1: // EOF
|
||||
buffToken = new Token(currentLocation(), Token.Symbol.EOF, null);
|
||||
buffToken = new Token(new Report.Location(0, 0), Token.Symbol.EOF, null);
|
||||
return;
|
||||
|
||||
case '\'':
|
||||
@ -154,7 +154,6 @@ public class LexAn implements AutoCloseable {
|
||||
}
|
||||
|
||||
buffToken = new Token(start, Token.Symbol.ASSIGN, null);
|
||||
nextChar();
|
||||
return;
|
||||
|
||||
case ',':
|
||||
@ -241,8 +240,17 @@ public class LexAn implements AutoCloseable {
|
||||
return;
|
||||
|
||||
case '/':
|
||||
buffToken = new Token(currentLocation(), Token.Symbol.DIV, null);
|
||||
nextChar();
|
||||
if (buffChar != '/') {
|
||||
buffToken = new Token(currentLocation(), Token.Symbol.DIV, null);
|
||||
return;
|
||||
}
|
||||
|
||||
while (buffChar != '\n') {
|
||||
nextChar();
|
||||
}
|
||||
|
||||
nextToken();
|
||||
return;
|
||||
|
||||
case '%':
|
||||
@ -266,15 +274,19 @@ public class LexAn implements AutoCloseable {
|
||||
return;
|
||||
}
|
||||
|
||||
if (buffChar >= '0' && buffChar <= '9') {
|
||||
if (isNumeric()) {
|
||||
intConst();
|
||||
return;
|
||||
}
|
||||
if (isAlpha()) {
|
||||
identifier();
|
||||
return;
|
||||
}
|
||||
|
||||
throw new Report.Error(currentLocation(), "Unrecognized character '" + (char) buffChar + "'.");
|
||||
}
|
||||
|
||||
private boolean isDigit() {
|
||||
private boolean isNumeric() {
|
||||
return buffChar >= '0' && buffChar <= '9';
|
||||
}
|
||||
|
||||
@ -286,6 +298,30 @@ public class LexAn implements AutoCloseable {
|
||||
return buffChar >= '0' && buffChar <= '9' || buffChar >= 'a' && buffChar <= 'f';
|
||||
}
|
||||
|
||||
private boolean isAlpha() {
|
||||
return buffChar >= 'a' && buffChar <= 'z' || buffChar >= 'A' && buffChar <= 'Z' || buffChar == '_';
|
||||
}
|
||||
|
||||
private boolean isAlphaNumeric() {
|
||||
return buffChar >= 'a' && buffChar <= 'z' || buffChar >= 'A' && buffChar <= 'Z' || buffChar >= '0' && buffChar <= '9' || buffChar == '_';
|
||||
}
|
||||
|
||||
private Token.Symbol getReservedWordSymbol(String word) {
|
||||
return switch (word) {
|
||||
case "fun" -> Token.Symbol.FUN;
|
||||
case "var" -> Token.Symbol.VAR;
|
||||
case "if" -> Token.Symbol.IF;
|
||||
case "then" -> Token.Symbol.THEN;
|
||||
case "else" -> Token.Symbol.ELSE;
|
||||
case "while" -> Token.Symbol.WHILE;
|
||||
case "do" -> Token.Symbol.DO;
|
||||
case "let" -> Token.Symbol.LET;
|
||||
case "in" -> Token.Symbol.IN;
|
||||
case "end" -> Token.Symbol.END;
|
||||
default -> null;
|
||||
};
|
||||
}
|
||||
|
||||
private void intConst() {
|
||||
Report.Location startLocation = currentLocation();
|
||||
Report.Location endLocation = currentLocation();
|
||||
@ -294,11 +330,11 @@ public class LexAn implements AutoCloseable {
|
||||
if (buffChar == '0') {
|
||||
lexeme.append((char) buffChar);
|
||||
nextChar();
|
||||
if (isDigit()) {
|
||||
if (isNumeric()) {
|
||||
throw new Report.Error(startLocation, "Leading zero is not allowed.");
|
||||
}
|
||||
} else {
|
||||
while (isDigit()) {
|
||||
while (isNumeric()) {
|
||||
lexeme.append((char) buffChar);
|
||||
endLocation = currentLocation();
|
||||
nextChar();
|
||||
@ -384,6 +420,29 @@ public class LexAn implements AutoCloseable {
|
||||
nextChar();
|
||||
}
|
||||
|
||||
private void identifier() {
|
||||
Report.Location startLocation = currentLocation();
|
||||
Report.Location endLocation = currentLocation();
|
||||
StringBuilder lexeme = new StringBuilder();
|
||||
|
||||
while (isAlphaNumeric()) {
|
||||
lexeme.append((char) buffChar);
|
||||
endLocation = currentLocation();
|
||||
nextChar();
|
||||
}
|
||||
|
||||
Token.Symbol symbol = getReservedWordSymbol(lexeme.toString());
|
||||
if (symbol == null) {
|
||||
symbol = Token.Symbol.IDENTIFIER;
|
||||
}
|
||||
|
||||
buffToken = new Token(
|
||||
new Report.Location(startLocation, endLocation),
|
||||
symbol,
|
||||
lexeme.toString()
|
||||
);
|
||||
}
|
||||
|
||||
/**
|
||||
* Vrne trenutni leksikalni simbol, ki ostane v lastnistvu leksikalnega
|
||||
* analizatorja.
|
||||
|
Loading…
x
Reference in New Issue
Block a user