WIP SynAn

This commit is contained in:
Gašper Dobrovoljc 2025-03-18 19:24:19 +01:00
parent ea5b857272
commit b1e61398a6
No known key found for this signature in database
GPG Key ID: 0E7E037018CFA5A5
6 changed files with 418 additions and 191 deletions

62
grammar.txt Normal file
View File

@ -0,0 +1,62 @@
program -> definition restdefs .
restdefs -> definition restdefs | .
definition -> fun IDENTIFIER LPAREN parameters RPAREN funcassign .
funcassign -> ASSIGN statements | .
definition -> var IDENTIFIER ASSING initializers .
parameters -> IDENTIFIER restparams | .
restparams -> COMMA parameters | .
statements -> statement reststmts .
reststmts -> COMMA statements | .
statement -> expression exprassign .
exprassign -> ASSIGN expression | .
statement -> if expression then statements elsestmt end .
elsestmt -> else statements | .
statement -> while expression do statements end .
statement -> let definition reststmtdefs in statements end .
reststmtdefs -> definition reststmtdefs | .
expression -> conjexpr restdisj .
restdisj -> OR expression | .
conjexpr -> cmpexpr restconj .
restconj -> AND conjexpr | .
cmpexpr -> addexpr restcmp .
restcmp -> cmpop cmpexpr | .
cmpop -> EQ | NEQ | LTH | GTH | LEQ | GEQ .
addexpr -> multexpr restadd .
restadd -> addop addexpr | .
addop -> ADD | SUB .
multexpr -> prefixexpr restmult .
restmult -> multop multexpr | .
multop -> MUL | DIV | MOD .
prefixexpr -> prefixop postfixexpr .
prefixop -> NOT prefixop | ADD prefixop | SUB prefixop | PTR prefixop | .
postfixexpr -> primary postfixop .
postfixop -> PTR postfixop | .
primary -> const | LPAREN expression RPAREN | IDENTIFIER exprargs .
exprargs -> LPAREN arguments RPAREN | .
arguments -> expression restargs | .
restargs -> COMMA restargs | .
initializers -> initializer restinits | .
restinits -> COMMA initializers | .
initializer -> INTCONST intconstmul | CHARCONST | STRINGCONST .
intconstmul -> MUL const | .
const -> INTCONST | CHARCONST | STRINGCONST .

View File

@ -2,5 +2,5 @@ JAVA = java --enable-preview
.PHONY : %
% : %.pins25
$(JAVA) -classpath ../bin pins25.phase.LexAn $<
$(JAVA) -classpath ../bin pins25.phase.SynAn $<

View File

@ -1,16 +1,2 @@
1234567890
= , && || ! == != > < >= <= + - * / % ^ ( )
=,&&||!==!=><>=<=+-*/%^()
_a_0_ A0312 asd312 a_132asdnmznkjNAJDKNSJKDF
fun var if then else while do let in end
_fun_ _var_ _if_ _then_ _else_ _while_ _do_ _let_ _in_ _end_
infun invar inif inthen inelse inwhile indo inlet inin inend
funin varin ifin thenin elsein whilein doin letin endin
// == * 1248129 d8as90d8as9d asdkal //
1234567890='a' // 1+2
-1/asd*'x'+""
'a' '\'' '\\' '\0a' "\\ 1231 \n \ff _-ads"
"BESEDNJAKKKK" "0\" 0123" 0=XYZ 123+3+2+1(SSS) 4_bese
dica beseder_3456
fun test(a, b,,) =
var c = 0

View File

@ -1,7 +1,5 @@
package pins25.common;
import java.util.HashMap;
/**
* Leksikalni simbol.
*
@ -15,141 +13,73 @@ public record Token(Report.Location location, Symbol symbol, String lexeme) impl
* Vrste leksikalnih simbolov.
*/
public enum Symbol {
/**
* Konec datoteke.
*/
/** Konec datoteke. */
EOF,
/**
* Stevilo.
*/
/** Stevilo. */
INTCONST,
/**
* Znak.
*/
/** Znak. */
CHARCONST,
/**
* Niz znakov.
*/
/** Niz znakov. */
STRINGCONST,
/**
* Ime.
*/
/** Ime. */
IDENTIFIER,
/**
* Kljucna beseda {@code fun}.
*/
/** Kljucna beseda {@code fun}. */
FUN,
/**
* Kljucna beseda {@code var}.
*/
/** Kljucna beseda {@code var}. */
VAR,
/**
* Kljucna beseda {@code if}.
*/
/** Kljucna beseda {@code if}. */
IF,
/**
* Kljucna beseda {@code then}.
*/
/** Kljucna beseda {@code then}. */
THEN,
/**
* Kljucna beseda {@code else}.
*/
/** Kljucna beseda {@code else}. */
ELSE,
/**
* Kljucna beseda {@code while}.
*/
/** Kljucna beseda {@code while}. */
WHILE,
/**
* Kljucna beseda {@code do}.
*/
/** Kljucna beseda {@code do}. */
DO,
/**
* Kljucna beseda {@code let}.
*/
/** Kljucna beseda {@code let}. */
LET,
/**
* Kljucna beseda {@code in}.
*/
/** Kljucna beseda {@code in}. */
IN,
/**
* Kljucna beseda {@code end}.
*/
/** Kljucna beseda {@code end}. */
END,
/**
* Simbol {@code =}.
*/
/** Simbol {@code =}. */
ASSIGN,
/**
* Simbol {@code ,}.
*/
/** Simbol {@code ,}. */
COMMA,
/**
* Simbol {@code &&}.
*/
/** Simbol {@code &&}. */
AND,
/**
* Simbol {@code ||}.
*/
/** Simbol {@code ||}. */
OR,
/**
* Simbol {@code !}.
*/
/** Simbol {@code !}. */
NOT,
/**
* Simbol {@code ==}.
*/
/** Simbol {@code ==}. */
EQU,
/**
* Simbol {@code !=}.
*/
/** Simbol {@code !=}. */
NEQ,
/**
* Simbol {@code >}.
*/
/** Simbol {@code >}. */
GTH,
/**
* Simbol {@code <}.
*/
/** Simbol {@code <}. */
LTH,
/**
* Simbol {@code >=}.
*/
/** Simbol {@code >=}. */
GEQ,
/**
* Simbol {@code <=}.
*/
/** Simbol {@code <=}. */
LEQ,
/**
* Simbol {@code +}.
*/
/** Simbol {@code +}. */
ADD,
/**
* Simbol {@code -}.
*/
/** Simbol {@code -}. */
SUB,
/**
* Simbol {@code *}.
*/
/** Simbol {@code *}. */
MUL,
/**
* Simbol {@code /}.
*/
/** Simbol {@code /}. */
DIV,
/**
* Simbol {@code %}.
*/
/** Simbol {@code %}. */
MOD,
/**
* Simbol {@code ^}.
*/
/** Simbol {@code ^}. */
PTR,
/**
* Simbol {@code (}.
*/
/** Simbol {@code (}. */
LPAREN,
/**
* Simbol {@code )}.
*/
/** Simbol {@code )}. */
RPAREN,
}

View File

@ -147,17 +147,17 @@ public class LexAn implements AutoCloseable {
buffToken = new Token(
new Report.Location(start, currentLocation()),
Token.Symbol.EQU,
null
"=="
);
nextChar();
return;
}
buffToken = new Token(start, Token.Symbol.ASSIGN, null);
buffToken = new Token(start, Token.Symbol.ASSIGN, "=");
return;
case ',':
buffToken = new Token(currentLocation(), Token.Symbol.COMMA, null);
buffToken = new Token(currentLocation(), Token.Symbol.COMMA, ",");
nextChar();
return;
@ -166,7 +166,7 @@ public class LexAn implements AutoCloseable {
if (buffChar != '&') {
throw new Report.Error(currentLocation(), "Invalid character '" + (char) buffChar + "'");
}
buffToken = new Token(new Report.Location(start, currentLocation()), Token.Symbol.AND, null);
buffToken = new Token(new Report.Location(start, currentLocation()), Token.Symbol.AND, "&&");
nextChar();
return;
@ -175,7 +175,7 @@ public class LexAn implements AutoCloseable {
if (buffChar != '|') {
throw new Report.Error(currentLocation(), "Invalid character '" + (char) buffChar + "'");
}
buffToken = new Token(new Report.Location(start, currentLocation()), Token.Symbol.OR, null);
buffToken = new Token(new Report.Location(start, currentLocation()), Token.Symbol.OR, "||");
nextChar();
return;
@ -185,13 +185,13 @@ public class LexAn implements AutoCloseable {
buffToken = new Token(
new Report.Location(start, currentLocation()),
Token.Symbol.NEQ,
null
"!="
);
nextChar();
return;
}
buffToken = new Token(start, Token.Symbol.NOT, null);
buffToken = new Token(start, Token.Symbol.NOT, "!");
return;
case '>':
@ -200,13 +200,13 @@ public class LexAn implements AutoCloseable {
buffToken = new Token(
new Report.Location(start, currentLocation()),
Token.Symbol.GEQ,
null
">="
);
nextChar();
return;
}
buffToken = new Token(start, Token.Symbol.GTH, null);
buffToken = new Token(start, Token.Symbol.GTH, ">");
return;
case '<':
@ -215,27 +215,27 @@ public class LexAn implements AutoCloseable {
buffToken = new Token(
new Report.Location(start, currentLocation()),
Token.Symbol.LEQ,
null
"<="
);
nextChar();
return;
}
buffToken = new Token(start, Token.Symbol.LTH, null);
buffToken = new Token(start, Token.Symbol.LTH, "<");
return;
case '+':
buffToken = new Token(currentLocation(), Token.Symbol.ADD, null);
buffToken = new Token(currentLocation(), Token.Symbol.ADD, "+");
nextChar();
return;
case '-':
buffToken = new Token(currentLocation(), Token.Symbol.SUB, null);
buffToken = new Token(currentLocation(), Token.Symbol.SUB, "-");
nextChar();
return;
case '*':
buffToken = new Token(currentLocation(), Token.Symbol.MUL, null);
buffToken = new Token(currentLocation(), Token.Symbol.MUL, "*");
nextChar();
return;
@ -243,7 +243,7 @@ public class LexAn implements AutoCloseable {
start = currentLocation();
nextChar();
if (buffChar != '/') {
buffToken = new Token(start, Token.Symbol.DIV, null);
buffToken = new Token(start, Token.Symbol.DIV, "/");
return;
}
@ -255,22 +255,22 @@ public class LexAn implements AutoCloseable {
return;
case '%':
buffToken = new Token(currentLocation(), Token.Symbol.MOD, null);
buffToken = new Token(currentLocation(), Token.Symbol.MOD, "%");
nextChar();
return;
case '^':
buffToken = new Token(currentLocation(), Token.Symbol.PTR, null);
buffToken = new Token(currentLocation(), Token.Symbol.PTR, "^");
nextChar();
return;
case '(':
buffToken = new Token(currentLocation(), Token.Symbol.LPAREN, null);
buffToken = new Token(currentLocation(), Token.Symbol.LPAREN, "(");
nextChar();
return;
case ')':
buffToken = new Token(currentLocation(), Token.Symbol.RPAREN, null);
buffToken = new Token(currentLocation(), Token.Symbol.RPAREN, ")");
nextChar();
return;
}

249
src/pins25/phase/SynAn.java Normal file
View File

@ -0,0 +1,249 @@
package pins25.phase;
import pins25.common.*;
import java.util.Objects;
/**
* Sintaksni analizator.
*/
public class SynAn implements AutoCloseable {
/**
* Leksikalni analizator.
*/
private final LexAn lexAn;
/**
* Ustvari nov sintaksni analizator.
*
* @param srcFileName Ime izvorne datoteke.
*/
public SynAn(final String srcFileName) {
this.lexAn = new LexAn(srcFileName);
}
@Override
public void close() {
lexAn.close();
}
/**
* Prevzame leksikalni analizator od leksikalnega analizatorja in preveri, ali
* je prave vrste.
*
* @param symbol Pričakovana vrsta leksikalnega simbola.
* @return Prevzeti leksikalni simbol.
*/
private Token check(Token.Symbol symbol) {
final Token token = lexAn.takeToken();
if (token.symbol() != symbol)
throw new Report.Error(token, "Unexpected symbol '" + token.lexeme() + "', expected " + symbol + ".");
return token;
}
/**
* Opravi sintaksno analizo.
*/
public void parse() {
parseProgram();
if (lexAn.peekToken().symbol() != Token.Symbol.EOF)
Report.warning(lexAn.peekToken(),
"Unexpected text '" + lexAn.peekToken().lexeme() + "...' at the end of the program.");
}
private void parseProgram() {
parseDefinition();
parseRestDefinitions();
}
private void parseRestDefinitions() {
if (lexAn.peekToken().symbol() != Token.Symbol.EOF) {
parseDefinition();
parseRestDefinitions();
}
}
private void parseDefinition() {
Token token = lexAn.peekToken();
switch (token.symbol()) {
case Token.Symbol.FUN:
check(Token.Symbol.FUN);
check(Token.Symbol.IDENTIFIER);
check(Token.Symbol.LPAREN);
parseParameters();
check(Token.Symbol.RPAREN);
parseFunctionAssignment();
break;
case Token.Symbol.VAR:
check(Token.Symbol.VAR);
check(Token.Symbol.IDENTIFIER);
check(Token.Symbol.ASSIGN);
parseInitializers();
break;
default:
throw new Report.Error(token, "Unexpected symbol '" + token.lexeme() + "', expected FUN or VAR.");
}
}
private void parseFunctionAssignment() {
Token token = lexAn.peekToken();
switch (token.symbol()) {
case Token.Symbol.FUN:
case Token.Symbol.VAR:
case Token.Symbol.IN:
case Token.Symbol.EOF:
break;
case Token.Symbol.ASSIGN:
check(Token.Symbol.ASSIGN);
parseStatements();
break;
default:
throw new Report.Error(token, "Unexpected symbol '" + token.lexeme() + "', expected ASSIGN.");
}
}
private void parseParameters() {
Token token = lexAn.peekToken();
switch (token.symbol()) {
case Token.Symbol.IDENTIFIER:
check(Token.Symbol.IDENTIFIER);
parseRestParameters();
break;
case Token.Symbol.RPAREN:
break;
default:
throw new Report.Error(token, "Unexpected symbol '" + token.lexeme() + "', expected IDENTIFIER or RPAREN.");
}
}
private void parseRestParameters() {
Token token = lexAn.peekToken();
switch (token.symbol()) {
case Token.Symbol.RPAREN:
break;
case Token.Symbol.COMMA:
check(Token.Symbol.COMMA);
check(Token.Symbol.IDENTIFIER);
parseRestParameters();
break;
default:
throw new Report.Error(token, "Unexpected symbol '" + token.lexeme() + "', expected ')' or ','.");
}
}
private void parseStatements() {
parseStatement();
parseRestStatements();
}
private void parseStatement() {
Token token = lexAn.peekToken();
switch (token.symbol()) {
case Token.Symbol.IDENTIFIER:
case Token.Symbol.LPAREN:
case Token.Symbol.ADD:
case Token.Symbol.SUB:
case Token.Symbol.NOT:
case Token.Symbol.PTR:
case Token.Symbol.INTCONST:
case Token.Symbol.CHARCONST:
case Token.Symbol.STRINGCONST:
parseExpression();
parseExpressionAssign();
break;
case Token.Symbol.IF:
check(Token.Symbol.IF);
parseExpression();
check(Token.Symbol.THEN);
parseStatements();
parseElseStatement();
check(Token.Symbol.END);
break;
case Token.Symbol.WHILE:
check(Token.Symbol.WHILE);
parseExpression();
check(Token.Symbol.DO);
parseStatements();
check(Token.Symbol.END);
break;
case Token.Symbol.LET:
check(Token.Symbol.LET);
parseDefinition();
parseRestStatementDefinitions();
check(Token.Symbol.IN);
parseStatements();
check(Token.Symbol.END);
break;
}
}
private void parseRestStatements() {
if (lexAn.peekToken().symbol() == Token.Symbol.COMMA) {
check(Token.Symbol.COMMA);
parseStatements();
}
}
private void parseRestStatementDefinitions() {
}
private void parseInitializers() {
}
private void parseExpression() {
}
private void parseExpressionAssign() {
}
private void parseElseStatement() {
}
// --- ZAGON ---
/**
* Zagon sintaksnega analizatorja kot samostojnega programa.
*
* @param cmdLineArgs Argumenti v ukazni vrstici.
*/
public static void main(final String[] cmdLineArgs) {
System.out.println("This is PINS'25 compiler (syntax analysis):");
try {
if (cmdLineArgs.length == 0)
throw new Report.Error("No source file specified in the command line.");
if (cmdLineArgs.length > 1)
Report.warning("Unused arguments in the command line.");
try (SynAn synAn = new SynAn(cmdLineArgs[0])) {
synAn.parse();
}
// Upajmo, da kdaj pridemo to te tocke.
// A zavedajmo se sledecega:
// 1. Prevod je zaradi napak v programu lahko napacen :-o
// 2. Izvorni program se zdalec ni tisto, kar je programer hotel, da bi bil ;-)
Report.info("Done.");
} catch (Report.Error error) {
// Izpis opisa napake.
System.err.println(error.getMessage());
System.exit(1);
}
}
}