WIP SynAn

This commit is contained in:
Gašper Dobrovoljc 2025-03-18 19:24:19 +01:00
parent ea5b857272
commit b1e61398a6
No known key found for this signature in database
GPG Key ID: 0E7E037018CFA5A5
6 changed files with 418 additions and 191 deletions

62
grammar.txt Normal file
View File

@ -0,0 +1,62 @@
program -> definition restdefs .
restdefs -> definition restdefs | .
definition -> fun IDENTIFIER LPAREN parameters RPAREN funcassign .
funcassign -> ASSIGN statements | .
definition -> var IDENTIFIER ASSING initializers .
parameters -> IDENTIFIER restparams | .
restparams -> COMMA parameters | .
statements -> statement reststmts .
reststmts -> COMMA statements | .
statement -> expression exprassign .
exprassign -> ASSIGN expression | .
statement -> if expression then statements elsestmt end .
elsestmt -> else statements | .
statement -> while expression do statements end .
statement -> let definition reststmtdefs in statements end .
reststmtdefs -> definition reststmtdefs | .
expression -> conjexpr restdisj .
restdisj -> OR expression | .
conjexpr -> cmpexpr restconj .
restconj -> AND conjexpr | .
cmpexpr -> addexpr restcmp .
restcmp -> cmpop cmpexpr | .
cmpop -> EQ | NEQ | LTH | GTH | LEQ | GEQ .
addexpr -> multexpr restadd .
restadd -> addop addexpr | .
addop -> ADD | SUB .
multexpr -> prefixexpr restmult .
restmult -> multop multexpr | .
multop -> MUL | DIV | MOD .
prefixexpr -> prefixop postfixexpr .
prefixop -> NOT prefixop | ADD prefixop | SUB prefixop | PTR prefixop | .
postfixexpr -> primary postfixop .
postfixop -> PTR postfixop | .
primary -> const | LPAREN expression RPAREN | IDENTIFIER exprargs .
exprargs -> LPAREN arguments RPAREN | .
arguments -> expression restargs | .
restargs -> COMMA restargs | .
initializers -> initializer restinits | .
restinits -> COMMA initializers | .
initializer -> INTCONST intconstmul | CHARCONST | STRINGCONST .
intconstmul -> MUL const | .
const -> INTCONST | CHARCONST | STRINGCONST .

View File

@ -2,5 +2,5 @@ JAVA = java --enable-preview
.PHONY : % .PHONY : %
% : %.pins25 % : %.pins25
$(JAVA) -classpath ../bin pins25.phase.LexAn $< $(JAVA) -classpath ../bin pins25.phase.SynAn $<

View File

@ -1,16 +1,2 @@
1234567890 fun test(a, b,,) =
= , && || ! == != > < >= <= + - * / % ^ ( ) var c = 0
=,&&||!==!=><>=<=+-*/%^()
_a_0_ A0312 asd312 a_132asdnmznkjNAJDKNSJKDF
fun var if then else while do let in end
_fun_ _var_ _if_ _then_ _else_ _while_ _do_ _let_ _in_ _end_
infun invar inif inthen inelse inwhile indo inlet inin inend
funin varin ifin thenin elsein whilein doin letin endin
// == * 1248129 d8as90d8as9d asdkal //
1234567890='a' // 1+2
-1/asd*'x'+""
'a' '\'' '\\' '\0a' "\\ 1231 \n \ff _-ads"
"BESEDNJAKKKK" "0\" 0123" 0=XYZ 123+3+2+1(SSS) 4_bese
dica beseder_3456

View File

@ -1,7 +1,5 @@
package pins25.common; package pins25.common;
import java.util.HashMap;
/** /**
* Leksikalni simbol. * Leksikalni simbol.
* *
@ -11,158 +9,90 @@ import java.util.HashMap;
*/ */
public record Token(Report.Location location, Symbol symbol, String lexeme) implements Report.Locatable { public record Token(Report.Location location, Symbol symbol, String lexeme) implements Report.Locatable {
/** /**
* Vrste leksikalnih simbolov. * Vrste leksikalnih simbolov.
*/ */
public enum Symbol { public enum Symbol {
/** /** Konec datoteke. */
* Konec datoteke. EOF,
*/ /** Stevilo. */
EOF, INTCONST,
/** /** Znak. */
* Stevilo. CHARCONST,
*/ /** Niz znakov. */
INTCONST, STRINGCONST,
/** /** Ime. */
* Znak. IDENTIFIER,
*/ /** Kljucna beseda {@code fun}. */
CHARCONST, FUN,
/** /** Kljucna beseda {@code var}. */
* Niz znakov. VAR,
*/ /** Kljucna beseda {@code if}. */
STRINGCONST, IF,
/** /** Kljucna beseda {@code then}. */
* Ime. THEN,
*/ /** Kljucna beseda {@code else}. */
IDENTIFIER, ELSE,
/** /** Kljucna beseda {@code while}. */
* Kljucna beseda {@code fun}. WHILE,
*/ /** Kljucna beseda {@code do}. */
FUN, DO,
/** /** Kljucna beseda {@code let}. */
* Kljucna beseda {@code var}. LET,
*/ /** Kljucna beseda {@code in}. */
VAR, IN,
/** /** Kljucna beseda {@code end}. */
* Kljucna beseda {@code if}. END,
*/ /** Simbol {@code =}. */
IF, ASSIGN,
/** /** Simbol {@code ,}. */
* Kljucna beseda {@code then}. COMMA,
*/ /** Simbol {@code &&}. */
THEN, AND,
/** /** Simbol {@code ||}. */
* Kljucna beseda {@code else}. OR,
*/ /** Simbol {@code !}. */
ELSE, NOT,
/** /** Simbol {@code ==}. */
* Kljucna beseda {@code while}. EQU,
*/ /** Simbol {@code !=}. */
WHILE, NEQ,
/** /** Simbol {@code >}. */
* Kljucna beseda {@code do}. GTH,
*/ /** Simbol {@code <}. */
DO, LTH,
/** /** Simbol {@code >=}. */
* Kljucna beseda {@code let}. GEQ,
*/ /** Simbol {@code <=}. */
LET, LEQ,
/** /** Simbol {@code +}. */
* Kljucna beseda {@code in}. ADD,
*/ /** Simbol {@code -}. */
IN, SUB,
/** /** Simbol {@code *}. */
* Kljucna beseda {@code end}. MUL,
*/ /** Simbol {@code /}. */
END, DIV,
/** /** Simbol {@code %}. */
* Simbol {@code =}. MOD,
*/ /** Simbol {@code ^}. */
ASSIGN, PTR,
/** /** Simbol {@code (}. */
* Simbol {@code ,}. LPAREN,
*/ /** Simbol {@code )}. */
COMMA, RPAREN,
/** }
* Simbol {@code &&}.
*/
AND,
/**
* Simbol {@code ||}.
*/
OR,
/**
* Simbol {@code !}.
*/
NOT,
/**
* Simbol {@code ==}.
*/
EQU,
/**
* Simbol {@code !=}.
*/
NEQ,
/**
* Simbol {@code >}.
*/
GTH,
/**
* Simbol {@code <}.
*/
LTH,
/**
* Simbol {@code >=}.
*/
GEQ,
/**
* Simbol {@code <=}.
*/
LEQ,
/**
* Simbol {@code +}.
*/
ADD,
/**
* Simbol {@code -}.
*/
SUB,
/**
* Simbol {@code *}.
*/
MUL,
/**
* Simbol {@code /}.
*/
DIV,
/**
* Simbol {@code %}.
*/
MOD,
/**
* Simbol {@code ^}.
*/
PTR,
/**
* Simbol {@code (}.
*/
LPAREN,
/**
* Simbol {@code )}.
*/
RPAREN,
}
@Override @Override
public String toString() { public String toString() {
String lexeme = switch (symbol) { String lexeme = switch (symbol) {
case INTCONST -> "(" + this.lexeme + ")"; case INTCONST -> "(" + this.lexeme + ")";
case CHARCONST -> "(" + this.lexeme + ")"; case CHARCONST -> "(" + this.lexeme + ")";
case STRINGCONST -> "(" + this.lexeme + ")"; case STRINGCONST -> "(" + this.lexeme + ")";
case IDENTIFIER -> "(" + this.lexeme + ")"; case IDENTIFIER -> "(" + this.lexeme + ")";
default -> ""; default -> "";
}; };
return location + " " + symbol + lexeme; return location + " " + symbol + lexeme;
} }
} }

View File

@ -147,17 +147,17 @@ public class LexAn implements AutoCloseable {
buffToken = new Token( buffToken = new Token(
new Report.Location(start, currentLocation()), new Report.Location(start, currentLocation()),
Token.Symbol.EQU, Token.Symbol.EQU,
null "=="
); );
nextChar(); nextChar();
return; return;
} }
buffToken = new Token(start, Token.Symbol.ASSIGN, null); buffToken = new Token(start, Token.Symbol.ASSIGN, "=");
return; return;
case ',': case ',':
buffToken = new Token(currentLocation(), Token.Symbol.COMMA, null); buffToken = new Token(currentLocation(), Token.Symbol.COMMA, ",");
nextChar(); nextChar();
return; return;
@ -166,7 +166,7 @@ public class LexAn implements AutoCloseable {
if (buffChar != '&') { if (buffChar != '&') {
throw new Report.Error(currentLocation(), "Invalid character '" + (char) buffChar + "'"); throw new Report.Error(currentLocation(), "Invalid character '" + (char) buffChar + "'");
} }
buffToken = new Token(new Report.Location(start, currentLocation()), Token.Symbol.AND, null); buffToken = new Token(new Report.Location(start, currentLocation()), Token.Symbol.AND, "&&");
nextChar(); nextChar();
return; return;
@ -175,7 +175,7 @@ public class LexAn implements AutoCloseable {
if (buffChar != '|') { if (buffChar != '|') {
throw new Report.Error(currentLocation(), "Invalid character '" + (char) buffChar + "'"); throw new Report.Error(currentLocation(), "Invalid character '" + (char) buffChar + "'");
} }
buffToken = new Token(new Report.Location(start, currentLocation()), Token.Symbol.OR, null); buffToken = new Token(new Report.Location(start, currentLocation()), Token.Symbol.OR, "||");
nextChar(); nextChar();
return; return;
@ -185,13 +185,13 @@ public class LexAn implements AutoCloseable {
buffToken = new Token( buffToken = new Token(
new Report.Location(start, currentLocation()), new Report.Location(start, currentLocation()),
Token.Symbol.NEQ, Token.Symbol.NEQ,
null "!="
); );
nextChar(); nextChar();
return; return;
} }
buffToken = new Token(start, Token.Symbol.NOT, null); buffToken = new Token(start, Token.Symbol.NOT, "!");
return; return;
case '>': case '>':
@ -200,13 +200,13 @@ public class LexAn implements AutoCloseable {
buffToken = new Token( buffToken = new Token(
new Report.Location(start, currentLocation()), new Report.Location(start, currentLocation()),
Token.Symbol.GEQ, Token.Symbol.GEQ,
null ">="
); );
nextChar(); nextChar();
return; return;
} }
buffToken = new Token(start, Token.Symbol.GTH, null); buffToken = new Token(start, Token.Symbol.GTH, ">");
return; return;
case '<': case '<':
@ -215,27 +215,27 @@ public class LexAn implements AutoCloseable {
buffToken = new Token( buffToken = new Token(
new Report.Location(start, currentLocation()), new Report.Location(start, currentLocation()),
Token.Symbol.LEQ, Token.Symbol.LEQ,
null "<="
); );
nextChar(); nextChar();
return; return;
} }
buffToken = new Token(start, Token.Symbol.LTH, null); buffToken = new Token(start, Token.Symbol.LTH, "<");
return; return;
case '+': case '+':
buffToken = new Token(currentLocation(), Token.Symbol.ADD, null); buffToken = new Token(currentLocation(), Token.Symbol.ADD, "+");
nextChar(); nextChar();
return; return;
case '-': case '-':
buffToken = new Token(currentLocation(), Token.Symbol.SUB, null); buffToken = new Token(currentLocation(), Token.Symbol.SUB, "-");
nextChar(); nextChar();
return; return;
case '*': case '*':
buffToken = new Token(currentLocation(), Token.Symbol.MUL, null); buffToken = new Token(currentLocation(), Token.Symbol.MUL, "*");
nextChar(); nextChar();
return; return;
@ -243,7 +243,7 @@ public class LexAn implements AutoCloseable {
start = currentLocation(); start = currentLocation();
nextChar(); nextChar();
if (buffChar != '/') { if (buffChar != '/') {
buffToken = new Token(start, Token.Symbol.DIV, null); buffToken = new Token(start, Token.Symbol.DIV, "/");
return; return;
} }
@ -255,22 +255,22 @@ public class LexAn implements AutoCloseable {
return; return;
case '%': case '%':
buffToken = new Token(currentLocation(), Token.Symbol.MOD, null); buffToken = new Token(currentLocation(), Token.Symbol.MOD, "%");
nextChar(); nextChar();
return; return;
case '^': case '^':
buffToken = new Token(currentLocation(), Token.Symbol.PTR, null); buffToken = new Token(currentLocation(), Token.Symbol.PTR, "^");
nextChar(); nextChar();
return; return;
case '(': case '(':
buffToken = new Token(currentLocation(), Token.Symbol.LPAREN, null); buffToken = new Token(currentLocation(), Token.Symbol.LPAREN, "(");
nextChar(); nextChar();
return; return;
case ')': case ')':
buffToken = new Token(currentLocation(), Token.Symbol.RPAREN, null); buffToken = new Token(currentLocation(), Token.Symbol.RPAREN, ")");
nextChar(); nextChar();
return; return;
} }

249
src/pins25/phase/SynAn.java Normal file
View File

@ -0,0 +1,249 @@
package pins25.phase;
import pins25.common.*;
import java.util.Objects;
/**
* Sintaksni analizator.
*/
public class SynAn implements AutoCloseable {
/**
* Leksikalni analizator.
*/
private final LexAn lexAn;
/**
* Ustvari nov sintaksni analizator.
*
* @param srcFileName Ime izvorne datoteke.
*/
public SynAn(final String srcFileName) {
this.lexAn = new LexAn(srcFileName);
}
@Override
public void close() {
lexAn.close();
}
/**
* Prevzame leksikalni analizator od leksikalnega analizatorja in preveri, ali
* je prave vrste.
*
* @param symbol Pričakovana vrsta leksikalnega simbola.
* @return Prevzeti leksikalni simbol.
*/
private Token check(Token.Symbol symbol) {
final Token token = lexAn.takeToken();
if (token.symbol() != symbol)
throw new Report.Error(token, "Unexpected symbol '" + token.lexeme() + "', expected " + symbol + ".");
return token;
}
/**
* Opravi sintaksno analizo.
*/
public void parse() {
parseProgram();
if (lexAn.peekToken().symbol() != Token.Symbol.EOF)
Report.warning(lexAn.peekToken(),
"Unexpected text '" + lexAn.peekToken().lexeme() + "...' at the end of the program.");
}
private void parseProgram() {
parseDefinition();
parseRestDefinitions();
}
private void parseRestDefinitions() {
if (lexAn.peekToken().symbol() != Token.Symbol.EOF) {
parseDefinition();
parseRestDefinitions();
}
}
private void parseDefinition() {
Token token = lexAn.peekToken();
switch (token.symbol()) {
case Token.Symbol.FUN:
check(Token.Symbol.FUN);
check(Token.Symbol.IDENTIFIER);
check(Token.Symbol.LPAREN);
parseParameters();
check(Token.Symbol.RPAREN);
parseFunctionAssignment();
break;
case Token.Symbol.VAR:
check(Token.Symbol.VAR);
check(Token.Symbol.IDENTIFIER);
check(Token.Symbol.ASSIGN);
parseInitializers();
break;
default:
throw new Report.Error(token, "Unexpected symbol '" + token.lexeme() + "', expected FUN or VAR.");
}
}
private void parseFunctionAssignment() {
Token token = lexAn.peekToken();
switch (token.symbol()) {
case Token.Symbol.FUN:
case Token.Symbol.VAR:
case Token.Symbol.IN:
case Token.Symbol.EOF:
break;
case Token.Symbol.ASSIGN:
check(Token.Symbol.ASSIGN);
parseStatements();
break;
default:
throw new Report.Error(token, "Unexpected symbol '" + token.lexeme() + "', expected ASSIGN.");
}
}
private void parseParameters() {
Token token = lexAn.peekToken();
switch (token.symbol()) {
case Token.Symbol.IDENTIFIER:
check(Token.Symbol.IDENTIFIER);
parseRestParameters();
break;
case Token.Symbol.RPAREN:
break;
default:
throw new Report.Error(token, "Unexpected symbol '" + token.lexeme() + "', expected IDENTIFIER or RPAREN.");
}
}
private void parseRestParameters() {
Token token = lexAn.peekToken();
switch (token.symbol()) {
case Token.Symbol.RPAREN:
break;
case Token.Symbol.COMMA:
check(Token.Symbol.COMMA);
check(Token.Symbol.IDENTIFIER);
parseRestParameters();
break;
default:
throw new Report.Error(token, "Unexpected symbol '" + token.lexeme() + "', expected ')' or ','.");
}
}
private void parseStatements() {
parseStatement();
parseRestStatements();
}
private void parseStatement() {
Token token = lexAn.peekToken();
switch (token.symbol()) {
case Token.Symbol.IDENTIFIER:
case Token.Symbol.LPAREN:
case Token.Symbol.ADD:
case Token.Symbol.SUB:
case Token.Symbol.NOT:
case Token.Symbol.PTR:
case Token.Symbol.INTCONST:
case Token.Symbol.CHARCONST:
case Token.Symbol.STRINGCONST:
parseExpression();
parseExpressionAssign();
break;
case Token.Symbol.IF:
check(Token.Symbol.IF);
parseExpression();
check(Token.Symbol.THEN);
parseStatements();
parseElseStatement();
check(Token.Symbol.END);
break;
case Token.Symbol.WHILE:
check(Token.Symbol.WHILE);
parseExpression();
check(Token.Symbol.DO);
parseStatements();
check(Token.Symbol.END);
break;
case Token.Symbol.LET:
check(Token.Symbol.LET);
parseDefinition();
parseRestStatementDefinitions();
check(Token.Symbol.IN);
parseStatements();
check(Token.Symbol.END);
break;
}
}
private void parseRestStatements() {
if (lexAn.peekToken().symbol() == Token.Symbol.COMMA) {
check(Token.Symbol.COMMA);
parseStatements();
}
}
private void parseRestStatementDefinitions() {
}
private void parseInitializers() {
}
private void parseExpression() {
}
private void parseExpressionAssign() {
}
private void parseElseStatement() {
}
// --- ZAGON ---
/**
* Zagon sintaksnega analizatorja kot samostojnega programa.
*
* @param cmdLineArgs Argumenti v ukazni vrstici.
*/
public static void main(final String[] cmdLineArgs) {
System.out.println("This is PINS'25 compiler (syntax analysis):");
try {
if (cmdLineArgs.length == 0)
throw new Report.Error("No source file specified in the command line.");
if (cmdLineArgs.length > 1)
Report.warning("Unused arguments in the command line.");
try (SynAn synAn = new SynAn(cmdLineArgs[0])) {
synAn.parse();
}
// Upajmo, da kdaj pridemo to te tocke.
// A zavedajmo se sledecega:
// 1. Prevod je zaradi napak v programu lahko napacen :-o
// 2. Izvorni program se zdalec ni tisto, kar je programer hotel, da bi bil ;-)
Report.info("Done.");
} catch (Report.Error error) {
// Izpis opisa napake.
System.err.println(error.getMessage());
System.exit(1);
}
}
}