LexAn implementation done
This commit is contained in:
		
							parent
							
								
									42fe2bf270
								
							
						
					
					
						commit
						56facf7372
					
				
							
								
								
									
										5
									
								
								.idea/codeStyles/codeStyleConfig.xml
									
									
									
										generated
									
									
									
										Normal file
									
								
							
							
						
						
									
										5
									
								
								.idea/codeStyles/codeStyleConfig.xml
									
									
									
										generated
									
									
									
										Normal file
									
								
							@ -0,0 +1,5 @@
 | 
			
		||||
<component name="ProjectCodeStyleConfiguration">
 | 
			
		||||
  <state>
 | 
			
		||||
    <option name="PREFERRED_PROJECT_CODE_STYLE" value="Default" />
 | 
			
		||||
  </state>
 | 
			
		||||
</component>
 | 
			
		||||
@ -1 +1,2 @@
 | 
			
		||||
= , && || ! == != > < >= <= + - * / % ^ ( )
 | 
			
		||||
var test = "1234" // hello world
 | 
			
		||||
var something = 33
 | 
			
		||||
@ -1,98 +1,168 @@
 | 
			
		||||
package pins25.common;
 | 
			
		||||
 | 
			
		||||
import java.util.HashMap;
 | 
			
		||||
 | 
			
		||||
/**
 | 
			
		||||
 * Leksikalni simbol.
 | 
			
		||||
 * 
 | 
			
		||||
 *
 | 
			
		||||
 * @param location Lokacija simbola v izvornem programu.
 | 
			
		||||
 * @param symbol   Vrsta simbola.
 | 
			
		||||
 * @param lexeme   Znakovna predstavitev simbola.
 | 
			
		||||
 */
 | 
			
		||||
public record Token(Report.Location location, Symbol symbol, String lexeme) implements Report.Locatable {
 | 
			
		||||
 | 
			
		||||
	/**
 | 
			
		||||
	 * Vrste leksikalnih simbolov.
 | 
			
		||||
	 */
 | 
			
		||||
	public enum Symbol {
 | 
			
		||||
		/** Konec datoteke. */
 | 
			
		||||
		EOF,
 | 
			
		||||
		/** Stevilo. */
 | 
			
		||||
		INTCONST,
 | 
			
		||||
		/** Znak. */
 | 
			
		||||
		CHARCONST,
 | 
			
		||||
		/** Niz znakov. */
 | 
			
		||||
		STRINGCONST,
 | 
			
		||||
		/** Ime. */
 | 
			
		||||
		IDENTIFIER,
 | 
			
		||||
		/** Kljucna beseda {@code fun}. */
 | 
			
		||||
		FUN,
 | 
			
		||||
		/** Kljucna beseda {@code var}. */
 | 
			
		||||
		VAR,
 | 
			
		||||
		/** Kljucna beseda {@code if}. */
 | 
			
		||||
		IF,
 | 
			
		||||
		/** Kljucna beseda {@code then}. */
 | 
			
		||||
		THEN,
 | 
			
		||||
		/** Kljucna beseda {@code else}. */
 | 
			
		||||
		ELSE,
 | 
			
		||||
		/** Kljucna beseda {@code while}. */
 | 
			
		||||
		WHILE,
 | 
			
		||||
		/** Kljucna beseda {@code do}. */
 | 
			
		||||
		DO,
 | 
			
		||||
		/** Kljucna beseda {@code let}. */
 | 
			
		||||
		LET,
 | 
			
		||||
		/** Kljucna beseda {@code in}. */
 | 
			
		||||
		IN,
 | 
			
		||||
		/** Kljucna beseda {@code end}. */
 | 
			
		||||
		END,
 | 
			
		||||
		/** Simbol {@code =}. */
 | 
			
		||||
		ASSIGN,
 | 
			
		||||
		/** Simbol {@code ,}. */
 | 
			
		||||
		COMMA,
 | 
			
		||||
		/** Simbol {@code &&}. */
 | 
			
		||||
		AND,
 | 
			
		||||
		/** Simbol {@code ||}. */
 | 
			
		||||
		OR,
 | 
			
		||||
		/** Simbol {@code !}. */
 | 
			
		||||
		NOT,
 | 
			
		||||
		/** Simbol {@code ==}. */
 | 
			
		||||
		EQU,
 | 
			
		||||
		/** Simbol {@code !=}. */
 | 
			
		||||
		NEQ,
 | 
			
		||||
		/** Simbol {@code >}. */
 | 
			
		||||
		GTH,
 | 
			
		||||
		/** Simbol {@code <}. */
 | 
			
		||||
		LTH,
 | 
			
		||||
		/** Simbol {@code >=}. */
 | 
			
		||||
		GEQ,
 | 
			
		||||
		/** Simbol {@code <=}. */
 | 
			
		||||
		LEQ,
 | 
			
		||||
		/** Simbol {@code +}. */
 | 
			
		||||
		ADD,
 | 
			
		||||
		/** Simbol {@code -}. */
 | 
			
		||||
		SUB,
 | 
			
		||||
		/** Simbol {@code *}. */
 | 
			
		||||
		MUL,
 | 
			
		||||
		/** Simbol {@code /}. */
 | 
			
		||||
		DIV,
 | 
			
		||||
		/** Simbol {@code %}. */
 | 
			
		||||
		MOD,
 | 
			
		||||
		/** Simbol {@code ^}. */
 | 
			
		||||
		PTR,
 | 
			
		||||
		/** Simbol {@code (}. */
 | 
			
		||||
		LPAREN,
 | 
			
		||||
		/** Simbol {@code )}. */
 | 
			
		||||
		RPAREN,
 | 
			
		||||
	}
 | 
			
		||||
    /**
 | 
			
		||||
     * Vrste leksikalnih simbolov.
 | 
			
		||||
     */
 | 
			
		||||
    public enum Symbol {
 | 
			
		||||
        /**
 | 
			
		||||
         * Konec datoteke.
 | 
			
		||||
         */
 | 
			
		||||
        EOF,
 | 
			
		||||
        /**
 | 
			
		||||
         * Stevilo.
 | 
			
		||||
         */
 | 
			
		||||
        INTCONST,
 | 
			
		||||
        /**
 | 
			
		||||
         * Znak.
 | 
			
		||||
         */
 | 
			
		||||
        CHARCONST,
 | 
			
		||||
        /**
 | 
			
		||||
         * Niz znakov.
 | 
			
		||||
         */
 | 
			
		||||
        STRINGCONST,
 | 
			
		||||
        /**
 | 
			
		||||
         * Ime.
 | 
			
		||||
         */
 | 
			
		||||
        IDENTIFIER,
 | 
			
		||||
        /**
 | 
			
		||||
         * Kljucna beseda {@code fun}.
 | 
			
		||||
         */
 | 
			
		||||
        FUN,
 | 
			
		||||
        /**
 | 
			
		||||
         * Kljucna beseda {@code var}.
 | 
			
		||||
         */
 | 
			
		||||
        VAR,
 | 
			
		||||
        /**
 | 
			
		||||
         * Kljucna beseda {@code if}.
 | 
			
		||||
         */
 | 
			
		||||
        IF,
 | 
			
		||||
        /**
 | 
			
		||||
         * Kljucna beseda {@code then}.
 | 
			
		||||
         */
 | 
			
		||||
        THEN,
 | 
			
		||||
        /**
 | 
			
		||||
         * Kljucna beseda {@code else}.
 | 
			
		||||
         */
 | 
			
		||||
        ELSE,
 | 
			
		||||
        /**
 | 
			
		||||
         * Kljucna beseda {@code while}.
 | 
			
		||||
         */
 | 
			
		||||
        WHILE,
 | 
			
		||||
        /**
 | 
			
		||||
         * Kljucna beseda {@code do}.
 | 
			
		||||
         */
 | 
			
		||||
        DO,
 | 
			
		||||
        /**
 | 
			
		||||
         * Kljucna beseda {@code let}.
 | 
			
		||||
         */
 | 
			
		||||
        LET,
 | 
			
		||||
        /**
 | 
			
		||||
         * Kljucna beseda {@code in}.
 | 
			
		||||
         */
 | 
			
		||||
        IN,
 | 
			
		||||
        /**
 | 
			
		||||
         * Kljucna beseda {@code end}.
 | 
			
		||||
         */
 | 
			
		||||
        END,
 | 
			
		||||
        /**
 | 
			
		||||
         * Simbol {@code =}.
 | 
			
		||||
         */
 | 
			
		||||
        ASSIGN,
 | 
			
		||||
        /**
 | 
			
		||||
         * Simbol {@code ,}.
 | 
			
		||||
         */
 | 
			
		||||
        COMMA,
 | 
			
		||||
        /**
 | 
			
		||||
         * Simbol {@code &&}.
 | 
			
		||||
         */
 | 
			
		||||
        AND,
 | 
			
		||||
        /**
 | 
			
		||||
         * Simbol {@code ||}.
 | 
			
		||||
         */
 | 
			
		||||
        OR,
 | 
			
		||||
        /**
 | 
			
		||||
         * Simbol {@code !}.
 | 
			
		||||
         */
 | 
			
		||||
        NOT,
 | 
			
		||||
        /**
 | 
			
		||||
         * Simbol {@code ==}.
 | 
			
		||||
         */
 | 
			
		||||
        EQU,
 | 
			
		||||
        /**
 | 
			
		||||
         * Simbol {@code !=}.
 | 
			
		||||
         */
 | 
			
		||||
        NEQ,
 | 
			
		||||
        /**
 | 
			
		||||
         * Simbol {@code >}.
 | 
			
		||||
         */
 | 
			
		||||
        GTH,
 | 
			
		||||
        /**
 | 
			
		||||
         * Simbol {@code <}.
 | 
			
		||||
         */
 | 
			
		||||
        LTH,
 | 
			
		||||
        /**
 | 
			
		||||
         * Simbol {@code >=}.
 | 
			
		||||
         */
 | 
			
		||||
        GEQ,
 | 
			
		||||
        /**
 | 
			
		||||
         * Simbol {@code <=}.
 | 
			
		||||
         */
 | 
			
		||||
        LEQ,
 | 
			
		||||
        /**
 | 
			
		||||
         * Simbol {@code +}.
 | 
			
		||||
         */
 | 
			
		||||
        ADD,
 | 
			
		||||
        /**
 | 
			
		||||
         * Simbol {@code -}.
 | 
			
		||||
         */
 | 
			
		||||
        SUB,
 | 
			
		||||
        /**
 | 
			
		||||
         * Simbol {@code *}.
 | 
			
		||||
         */
 | 
			
		||||
        MUL,
 | 
			
		||||
        /**
 | 
			
		||||
         * Simbol {@code /}.
 | 
			
		||||
         */
 | 
			
		||||
        DIV,
 | 
			
		||||
        /**
 | 
			
		||||
         * Simbol {@code %}.
 | 
			
		||||
         */
 | 
			
		||||
        MOD,
 | 
			
		||||
        /**
 | 
			
		||||
         * Simbol {@code ^}.
 | 
			
		||||
         */
 | 
			
		||||
        PTR,
 | 
			
		||||
        /**
 | 
			
		||||
         * Simbol {@code (}.
 | 
			
		||||
         */
 | 
			
		||||
        LPAREN,
 | 
			
		||||
        /**
 | 
			
		||||
         * Simbol {@code )}.
 | 
			
		||||
         */
 | 
			
		||||
        RPAREN,
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
	@Override
 | 
			
		||||
	public String toString() {
 | 
			
		||||
		String lexeme = switch (symbol) {
 | 
			
		||||
		case INTCONST -> "(" + this.lexeme + ")";
 | 
			
		||||
		case CHARCONST -> "(" + this.lexeme + ")";
 | 
			
		||||
		case STRINGCONST -> "(" + this.lexeme + ")";
 | 
			
		||||
		case IDENTIFIER -> "(" + this.lexeme + ")";
 | 
			
		||||
		default -> "";
 | 
			
		||||
		};
 | 
			
		||||
		return location + " " + symbol + lexeme;
 | 
			
		||||
	}
 | 
			
		||||
    @Override
 | 
			
		||||
    public String toString() {
 | 
			
		||||
        String lexeme = switch (symbol) {
 | 
			
		||||
            case INTCONST -> "(" + this.lexeme + ")";
 | 
			
		||||
            case CHARCONST -> "(" + this.lexeme + ")";
 | 
			
		||||
            case STRINGCONST -> "(" + this.lexeme + ")";
 | 
			
		||||
            case IDENTIFIER -> "(" + this.lexeme + ")";
 | 
			
		||||
            default -> "";
 | 
			
		||||
        };
 | 
			
		||||
        return location + " " + symbol + lexeme;
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
}
 | 
			
		||||
@ -92,7 +92,7 @@ public class LexAn implements AutoCloseable {
 | 
			
		||||
                    return;
 | 
			
		||||
                case '\t': // Prejsnji znak je tabulator, ta znak je morda potisnjen v desno.
 | 
			
		||||
                    buffChar = srcFile.read();
 | 
			
		||||
                    while (buffCharColumn % 8 != 0)
 | 
			
		||||
                    while (buffCharColumn % 4 != 0)
 | 
			
		||||
                        buffCharColumn += 1;
 | 
			
		||||
                    buffCharColumn += 1;
 | 
			
		||||
                    return;
 | 
			
		||||
@ -130,7 +130,7 @@ public class LexAn implements AutoCloseable {
 | 
			
		||||
        Report.Location start = currentLocation();
 | 
			
		||||
        switch (buffChar) {
 | 
			
		||||
            case -1: // EOF
 | 
			
		||||
                buffToken = new Token(currentLocation(), Token.Symbol.EOF, null);
 | 
			
		||||
                buffToken = new Token(new Report.Location(0, 0), Token.Symbol.EOF, null);
 | 
			
		||||
                return;
 | 
			
		||||
 | 
			
		||||
            case '\'':
 | 
			
		||||
@ -154,7 +154,6 @@ public class LexAn implements AutoCloseable {
 | 
			
		||||
                }
 | 
			
		||||
 | 
			
		||||
                buffToken = new Token(start, Token.Symbol.ASSIGN, null);
 | 
			
		||||
                nextChar();
 | 
			
		||||
                return;
 | 
			
		||||
 | 
			
		||||
            case ',':
 | 
			
		||||
@ -241,8 +240,17 @@ public class LexAn implements AutoCloseable {
 | 
			
		||||
                return;
 | 
			
		||||
 | 
			
		||||
            case '/':
 | 
			
		||||
                buffToken = new Token(currentLocation(), Token.Symbol.DIV, null);
 | 
			
		||||
                nextChar();
 | 
			
		||||
                if (buffChar != '/') {
 | 
			
		||||
                    buffToken = new Token(currentLocation(), Token.Symbol.DIV, null);
 | 
			
		||||
                    return;
 | 
			
		||||
                }
 | 
			
		||||
 | 
			
		||||
                while (buffChar != '\n') {
 | 
			
		||||
                    nextChar();
 | 
			
		||||
                }
 | 
			
		||||
 | 
			
		||||
                nextToken();
 | 
			
		||||
                return;
 | 
			
		||||
 | 
			
		||||
            case '%':
 | 
			
		||||
@ -266,15 +274,19 @@ public class LexAn implements AutoCloseable {
 | 
			
		||||
                return;
 | 
			
		||||
        }
 | 
			
		||||
 | 
			
		||||
        if (buffChar >= '0' && buffChar <= '9') {
 | 
			
		||||
        if (isNumeric()) {
 | 
			
		||||
            intConst();
 | 
			
		||||
            return;
 | 
			
		||||
        }
 | 
			
		||||
        if (isAlpha()) {
 | 
			
		||||
            identifier();
 | 
			
		||||
            return;
 | 
			
		||||
        }
 | 
			
		||||
 | 
			
		||||
        throw new Report.Error(currentLocation(), "Unrecognized character '" + (char) buffChar + "'.");
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    private boolean isDigit() {
 | 
			
		||||
    private boolean isNumeric() {
 | 
			
		||||
        return buffChar >= '0' && buffChar <= '9';
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
@ -286,6 +298,30 @@ public class LexAn implements AutoCloseable {
 | 
			
		||||
        return buffChar >= '0' && buffChar <= '9' || buffChar >= 'a' && buffChar <= 'f';
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    private boolean isAlpha() {
 | 
			
		||||
        return buffChar >= 'a' && buffChar <= 'z' || buffChar >= 'A' && buffChar <= 'Z' || buffChar == '_';
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    private boolean isAlphaNumeric() {
 | 
			
		||||
        return buffChar >= 'a' && buffChar <= 'z' || buffChar >= 'A' && buffChar <= 'Z' || buffChar >= '0' && buffChar <= '9' || buffChar == '_';
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    private Token.Symbol getReservedWordSymbol(String word) {
 | 
			
		||||
        return switch (word) {
 | 
			
		||||
            case "fun" -> Token.Symbol.FUN;
 | 
			
		||||
            case "var" -> Token.Symbol.VAR;
 | 
			
		||||
            case "if" -> Token.Symbol.IF;
 | 
			
		||||
            case "then" -> Token.Symbol.THEN;
 | 
			
		||||
            case "else" -> Token.Symbol.ELSE;
 | 
			
		||||
            case "while" -> Token.Symbol.WHILE;
 | 
			
		||||
            case "do" -> Token.Symbol.DO;
 | 
			
		||||
            case "let" -> Token.Symbol.LET;
 | 
			
		||||
            case "in" -> Token.Symbol.IN;
 | 
			
		||||
            case "end" -> Token.Symbol.END;
 | 
			
		||||
            default -> null;
 | 
			
		||||
        };
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    private void intConst() {
 | 
			
		||||
        Report.Location startLocation = currentLocation();
 | 
			
		||||
        Report.Location endLocation = currentLocation();
 | 
			
		||||
@ -294,11 +330,11 @@ public class LexAn implements AutoCloseable {
 | 
			
		||||
        if (buffChar == '0') {
 | 
			
		||||
            lexeme.append((char) buffChar);
 | 
			
		||||
            nextChar();
 | 
			
		||||
            if (isDigit()) {
 | 
			
		||||
            if (isNumeric()) {
 | 
			
		||||
                throw new Report.Error(startLocation, "Leading zero is not allowed.");
 | 
			
		||||
            }
 | 
			
		||||
        } else {
 | 
			
		||||
            while (isDigit()) {
 | 
			
		||||
            while (isNumeric()) {
 | 
			
		||||
                lexeme.append((char) buffChar);
 | 
			
		||||
                endLocation = currentLocation();
 | 
			
		||||
                nextChar();
 | 
			
		||||
@ -384,6 +420,29 @@ public class LexAn implements AutoCloseable {
 | 
			
		||||
        nextChar();
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    private void identifier() {
 | 
			
		||||
        Report.Location startLocation = currentLocation();
 | 
			
		||||
        Report.Location endLocation = currentLocation();
 | 
			
		||||
        StringBuilder lexeme = new StringBuilder();
 | 
			
		||||
 | 
			
		||||
        while (isAlphaNumeric()) {
 | 
			
		||||
            lexeme.append((char) buffChar);
 | 
			
		||||
            endLocation = currentLocation();
 | 
			
		||||
            nextChar();
 | 
			
		||||
        }
 | 
			
		||||
 | 
			
		||||
        Token.Symbol symbol = getReservedWordSymbol(lexeme.toString());
 | 
			
		||||
        if (symbol == null) {
 | 
			
		||||
            symbol = Token.Symbol.IDENTIFIER;
 | 
			
		||||
        }
 | 
			
		||||
 | 
			
		||||
        buffToken = new Token(
 | 
			
		||||
                new Report.Location(startLocation, endLocation),
 | 
			
		||||
                symbol,
 | 
			
		||||
                lexeme.toString()
 | 
			
		||||
        );
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    /**
 | 
			
		||||
     * Vrne trenutni leksikalni simbol, ki ostane v lastnistvu leksikalnega
 | 
			
		||||
     * analizatorja.
 | 
			
		||||
 | 
			
		||||
		Loading…
	
	
			
			x
			
			
		
	
		Reference in New Issue
	
	Block a user