commit d88951c22533fb8b2d5199b08e527806c1946711 Author: Gašper Dobrovoljc Date: Wed Mar 5 08:44:48 2025 +0100 WIP LexAn diff --git a/.DS_Store b/.DS_Store new file mode 100644 index 0000000..f45dcfc Binary files /dev/null and b/.DS_Store differ diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..89f9ac0 --- /dev/null +++ b/.gitignore @@ -0,0 +1 @@ +out/ diff --git a/.idea/.gitignore b/.idea/.gitignore new file mode 100644 index 0000000..13566b8 --- /dev/null +++ b/.idea/.gitignore @@ -0,0 +1,8 @@ +# Default ignored files +/shelf/ +/workspace.xml +# Editor-based HTTP Client requests +/httpRequests/ +# Datasource local storage ignored files +/dataSources/ +/dataSources.local.xml diff --git a/.idea/inspectionProfiles/Project_Default.xml b/.idea/inspectionProfiles/Project_Default.xml new file mode 100644 index 0000000..581c216 --- /dev/null +++ b/.idea/inspectionProfiles/Project_Default.xml @@ -0,0 +1,12 @@ + + + + \ No newline at end of file diff --git a/.idea/misc.xml b/.idea/misc.xml new file mode 100644 index 0000000..eeb80f7 --- /dev/null +++ b/.idea/misc.xml @@ -0,0 +1,6 @@ + + + + + + \ No newline at end of file diff --git a/.idea/modules.xml b/.idea/modules.xml new file mode 100644 index 0000000..f3d9431 --- /dev/null +++ b/.idea/modules.xml @@ -0,0 +1,8 @@ + + + + + + + + \ No newline at end of file diff --git a/pns.iml b/pns.iml new file mode 100644 index 0000000..c90834f --- /dev/null +++ b/pns.iml @@ -0,0 +1,11 @@ + + + + + + + + + + + \ No newline at end of file diff --git a/prg/Makefile b/prg/Makefile new file mode 100644 index 0000000..b8ef6b2 --- /dev/null +++ b/prg/Makefile @@ -0,0 +1,6 @@ +JAVA = java --enable-preview + +.PHONY : % +% : %.pins25 + $(JAVA) -classpath ../bin pins25.phase.LexAn $< + diff --git a/prg/test.pins b/prg/test.pins new file mode 100644 index 0000000..c8dcdaf --- /dev/null +++ b/prg/test.pins @@ -0,0 +1,2 @@ +'a' +'\n' \ No newline at end of file diff --git a/src/.DS_Store b/src/.DS_Store new file mode 100644 index 0000000..0108779 Binary files /dev/null and b/src/.DS_Store differ diff --git a/src/module-info.java b/src/module-info.java new file mode 100644 index 0000000..efc6d9f --- /dev/null +++ b/src/module-info.java @@ -0,0 +1,7 @@ +/** + * Implementacija programskega jezika PINS'25. + * + * @author bostjan.slivnik@fri.uni-lj.si + */ +module pins25 { +} \ No newline at end of file diff --git a/src/pins25/.DS_Store b/src/pins25/.DS_Store new file mode 100644 index 0000000..d63fd5a Binary files /dev/null and b/src/pins25/.DS_Store differ diff --git a/src/pins25/common/Report.java b/src/pins25/common/Report.java new file mode 100644 index 0000000..4bbfff7 --- /dev/null +++ b/src/pins25/common/Report.java @@ -0,0 +1,156 @@ +package pins25.common; + +/** + * Izpis obvestil, opozoril in napak. + */ +public class Report { + + @SuppressWarnings({ "doclint:missing" }) + private Report() { + throw new InternalError(); + } + + /** + * Opis lokacije v izvorni datoteki. + * + * @param begLine Zacetna vrstica. + * @param begColumn Zacetni stolpec. + * @param endLine Koncna vrstica. + * @param endColumn Koncni stolpec. + */ + public record Location(int begLine, int begColumn, int endLine, int endColumn) implements Locatable { + + /** + * Ustvari novo lokacijo, ki opisuje en sam znak izvorne datoteke. + * + * @param line Vrstica znaka. + * @param column Stolpec znaka. + */ + public Location(int line, int column) { + this(line, column, line, column); + } + + /** + * Ustvari novo lokacijo, ki se razteza od ene do druge lokacije. + * + * @param beg Prva lokacija. + * @param end Druga lokacija. + */ + public Location(Locatable beg, Locatable end) { + this(beg.location().begLine, beg.location().begColumn, end.location().endLine, end.location().endColumn); + } + + @Override + public String toString() { + return "[" + (begLine + "." + begColumn) + ":" + (endLine + "." + endColumn) + "]"; + } + + @Override + public Location location() { + return this; + } + + } + + /** + * Vmesnik, ki naj ga implementirajo razredi, katerih objekti predstavljajo dele + * izvorne datoteke. + */ + public interface Locatable { + + /** + * Vrne lokacijo dela izvorne datoteke, ki ga opisuje objekt. + * + * @return Opis lokacije v izvorni datoteki. + */ + public Location location(); + + } + + /** + * Izpis splosnega obvestila. + * + * @param message Obvestilo. + */ + public static void info(final String message) { + System.out.println(":-) " + message); + } + + /** + * Izpis obvestila, ki je vezano na del izvorne datoteke. + * + * @param location Opis lokacije v izvorni datoteki. + * @param message Obvestilo. + */ + public static void info(final Locatable location, final String message) { + System.out.println(":-) " + location + " " + message); + } + + /** + * Izpis splosnega opozorila. + * + * @param message Opozorilo. + */ + public static void warning(final String message) { + System.out.println(":-o " + message); + } + + /** + * Izpis opozorila, ki je vezano na del izvorne datoteke. + * + * @param location Opis lokacije v izvorni datoteki. + * @param message Opozorilo. + */ + public static void warning(final Locatable location, final String message) { + System.out.println(":-o " + location + " " + message); + } + + /** + * Napaka. + * + * Objekt tega razreda se vrze v primeru, ko je program odkril napako v izvorni + * datoteki, zaradi katere ni vec mozno nadaljevati z izvajanjem. + */ + @SuppressWarnings("serial") + public static class Error extends java.lang.Error { + + /** + * Ustvari novo napako. + * + * @param message Opis napake. + */ + public Error(final String message) { + super(":-( " + message); + } + + /** + * Ustvari novo napako, ki je veznana na del izvorne datoteke. + * + * @param location Opis lokacije v izvorni datoteki. + * @param message Opis napake. + */ + public Error(final Locatable location, final String message) { + super(":-( " + "[" + location.location() + "] " + message); + } + + } + + /** + * Notranja napaka. + * + * Objekt tega razreda se vze v primeru, ko program zazna notranjo napako. + */ + @SuppressWarnings("serial") + public static class InternalError extends Error { + + /** + * Ustvari novo notranjo napako. + */ + public InternalError() { + super("Internal error."); + this.printStackTrace(); + } + + } + +} \ No newline at end of file diff --git a/src/pins25/common/Token.java b/src/pins25/common/Token.java new file mode 100644 index 0000000..8735449 --- /dev/null +++ b/src/pins25/common/Token.java @@ -0,0 +1,98 @@ +package pins25.common; + +/** + * Leksikalni simbol. + * + * @param location Lokacija simbola v izvornem programu. + * @param symbol Vrsta simbola. + * @param lexeme Znakovna predstavitev simbola. + */ +public record Token(Report.Location location, Symbol symbol, String lexeme) implements Report.Locatable { + + /** + * Vrste leksikalnih simbolov. + */ + public enum Symbol { + /** Konec datoteke. */ + EOF, + /** Stevilo. */ + INTCONST, + /** Znak. */ + CHARCONST, + /** Niz znakov. */ + STRINGCONST, + /** Ime. */ + IDENTIFIER, + /** Kljucna beseda {@code fun}. */ + FUN, + /** Kljucna beseda {@code var}. */ + VAR, + /** Kljucna beseda {@code if}. */ + IF, + /** Kljucna beseda {@code then}. */ + THEN, + /** Kljucna beseda {@code else}. */ + ELSE, + /** Kljucna beseda {@code while}. */ + WHILE, + /** Kljucna beseda {@code do}. */ + DO, + /** Kljucna beseda {@code let}. */ + LET, + /** Kljucna beseda {@code in}. */ + IN, + /** Kljucna beseda {@code end}. */ + END, + /** Simbol {@code =}. */ + ASSIGN, + /** Simbol {@code ,}. */ + COMMA, + /** Simbol {@code &&}. */ + AND, + /** Simbol {@code ||}. */ + OR, + /** Simbol {@code !}. */ + NOT, + /** Simbol {@code ==}. */ + EQU, + /** Simbol {@code !=}. */ + NEQ, + /** Simbol {@code >}. */ + GTH, + /** Simbol {@code <}. */ + LTH, + /** Simbol {@code >=}. */ + GEQ, + /** Simbol {@code <=}. */ + LEQ, + /** Simbol {@code +}. */ + ADD, + /** Simbol {@code -}. */ + SUB, + /** Simbol {@code *}. */ + MUL, + /** Simbol {@code /}. */ + DIV, + /** Simbol {@code %}. */ + MOD, + /** Simbol {@code ^}. */ + PTR, + /** Simbol {@code (}. */ + LPAREN, + /** Simbol {@code )}. */ + RPAREN, + } + + @Override + public String toString() { + String lexeme = switch (symbol) { + case INTCONST -> "(" + this.lexeme + ")"; + case CHARCONST -> "(" + this.lexeme + ")"; + case STRINGCONST -> "(" + this.lexeme + ")"; + case IDENTIFIER -> "(" + this.lexeme + ")"; + default -> ""; + }; + return location + " " + symbol + lexeme; + } + +} \ No newline at end of file diff --git a/src/pins25/common/package-info.java b/src/pins25/common/package-info.java new file mode 100644 index 0000000..9cc81fd --- /dev/null +++ b/src/pins25/common/package-info.java @@ -0,0 +1,6 @@ +/** + * Koda, ki je skupna vecim fazam prevajalnika. + * + * @author bostjan.slivnik@fri.uni-lj.si + */ +package pins25.common; \ No newline at end of file diff --git a/src/pins25/phase/LexAn.java b/src/pins25/phase/LexAn.java new file mode 100644 index 0000000..297e320 --- /dev/null +++ b/src/pins25/phase/LexAn.java @@ -0,0 +1,318 @@ +package pins25.phase; + +import java.io.*; + +import pins25.common.*; + +/** + * Leksikalni analizator. + */ +public class LexAn implements AutoCloseable { + + /** + * Izvorna datoteka. + */ + private final Reader srcFile; + + /** + * Ustvari nov leksikalni analizator. + * + * @param srcFileName Ime izvorne datoteke. + */ + public LexAn(final String srcFileName) { + try { + srcFile = new BufferedReader(new InputStreamReader(new FileInputStream(new File(srcFileName)))); + nextChar(); // Pripravi prvi znak izvorne datoteke (glej {@link nextChar}). + } catch (FileNotFoundException __) { + throw new Report.Error("Source file '" + srcFileName + "' not found."); + } + } + + @Override + public void close() { + try { + srcFile.close(); + } catch (IOException __) { + throw new Report.Error("Cannot close source file."); + } + } + + /** + * Trenutni znak izvorne datoteke (glej {@link nextChar}). + */ + private int buffChar = -2; + + /** + * Vrstica trenutnega znaka izvorne datoteke (glej {@link nextChar}). + */ + private int buffCharLine = 0; + + /** + * Stolpec trenutnega znaka izvorne datoteke (glej {@link nextChar}). + */ + private int buffCharColumn = 0; + + /** + * Prebere naslednji znak izvorne datoteke. + *

+ * Izvorno datoteko beremo znak po znak. Trenutni znak izvorne datoteke je + * shranjen v spremenljivki {@link buffChar}, vrstica in stolpec trenutnega + * znaka izvorne datoteke sta shranjena v spremenljivkah {@link buffCharLine} in + * {@link buffCharColumn}. + *

+ * Zacetne vrednosti {@link buffChar}, {@link buffCharLine} in + * {@link buffCharColumn} so {@code '\n'}, {@code 0} in {@code 0}: branje prvega + * znaka izvorne datoteke bo na osnovi vrednosti {@code '\n'} spremenljivke + * {@link buffChar} prvemu znaku izvorne datoteke priredilo vrstico 1 in stolpec + * 1. + *

+ * Pri branju izvorne datoteke se predpostavlja, da je v spremenljivki + * {@link buffChar} ves "cas veljaven znak. Zunaj metode {@link nextChar} so vse + * spremenljivke {@link buffChar}, {@link buffCharLine} in + * {@link buffCharColumn} namenjene le branju. + *

+ * Vrednost {@code -1} v spremenljivki {@link buffChar} pomeni konec datoteke + * (vrednosti spremenljivk {@link buffCharLine} in {@link buffCharColumn} pa + * nista ve"c veljavni). + */ + private void nextChar() { + try { + switch (buffChar) { + case -2: // Noben znak "se ni bil prebran. + buffChar = srcFile.read(); + buffCharLine = buffChar == -1 ? 0 : 1; + buffCharColumn = buffChar == -1 ? 0 : 1; + return; + case -1: // Konec datoteke je bil ze viden. + return; + case '\n': // Prejsnji znak je koncal vrstico, zacne se nova vrstica. + buffChar = srcFile.read(); + buffCharLine = buffChar == -1 ? buffCharLine : buffCharLine + 1; + buffCharColumn = buffChar == -1 ? buffCharColumn : 1; + return; + case '\t': // Prejsnji znak je tabulator, ta znak je morda potisnjen v desno. + buffChar = srcFile.read(); + while (buffCharColumn % 8 != 0) + buffCharColumn += 1; + buffCharColumn += 1; + return; + default: // Prejsnji znak je brez posebnosti. + buffChar = srcFile.read(); + buffCharColumn += 1; + } + } catch (IOException __) { + throw new Report.Error("Cannot read source file."); + } + } + + private Report.Location currentLocation() { + return new Report.Location(buffCharLine, buffCharColumn); + } + + /** + * Trenutni leksikalni simbol. + *

+ * "Ce vrednost spremenljivke {@code buffToken} ni {@code null}, je simbol "ze + * prebran iz vhodne datoteke, ni pa "se predan naprej sintaksnemu analizatorju. + * Ta simbol je dostopen z metodama {@link peekToken} in {@link takeToken}. + */ + private Token buffToken = null; + + /** + * Prebere naslednji leksikalni simbol, ki je nato dostopen preko metod + * {@link peekToken} in {@link takeToken}. + */ + private void nextToken() { + while (buffChar == '\n') { + nextChar(); + } + + switch (buffChar) { + case -1: // EOF + buffToken = new Token(currentLocation(), Token.Symbol.EOF, null); + return; + + case '\'': + charConst(); + return; + + case '"': + stringConst(); + return; + } + + if (buffChar >= '0' && buffChar <= '9') { + intConst(); + return; + } + + throw new Report.Error(currentLocation(), "Unrecognized character '" + (char) buffChar + "'."); + } + + private boolean isDigit() { + return buffChar >= '0' && buffChar <= '9'; + } + + private boolean isChar() { + return buffChar >= ' ' && buffChar <= '~'; + } + + private boolean isHex() { + return buffChar >= '0' && buffChar <= '9' || buffChar >= 'a' && buffChar <= 'f'; + } + + private void intConst() { + Report.Location startLocation = currentLocation(); + Report.Location endLocation = currentLocation(); + StringBuilder lexeme = new StringBuilder(); + + if (buffChar == '0') { + lexeme.append((char) buffChar); + nextChar(); + if (isDigit()) { + throw new Report.Error(startLocation, "Leading zero is not allowed."); + } + } else { + while (isDigit()) { + lexeme.append((char) buffChar); + endLocation = currentLocation(); + nextChar(); + } + } + + buffToken = new Token( + new Report.Location(startLocation, endLocation), + Token.Symbol.INTCONST, + lexeme.toString() + ); + } + + private void charConst() { + Report.Location startLocation = currentLocation(); + StringBuilder lexeme = new StringBuilder(); + + lexeme.append((char) buffChar); + nextChar(); + + if (!isChar()) { + throw new Report.Error(startLocation, "Invalid character '" + (char) buffChar + "'."); + } + + lexeme.append((char) buffChar); + if (buffChar == '\\') { + nextChar(); + lexeme.append((char) buffChar); + if (buffChar == 'n' || buffChar == '\\' || buffChar == '\'') { + + } else if (isHex()) { + nextChar(); + lexeme.append((char) buffChar); + if (!isHex()) { + throw new Report.Error(currentLocation(), "Invalid ascii code '" + (char) buffChar + "'."); + } + } else { + throw new Report.Error(currentLocation(), "Invalid escaped character '" + (char) buffChar + "'."); + } + } + + nextChar(); + if (buffChar != '\'') { + throw new Report.Error(new Report.Location(startLocation, currentLocation()), "Unterminated character."); + } + lexeme.append((char) buffChar); + + buffToken = new Token( + new Report.Location(startLocation, currentLocation()), + Token.Symbol.CHARCONST, + lexeme.toString() + ); + + nextChar(); + } + + private void stringConst() { + Report.Location startLocation = currentLocation(); + StringBuilder lexeme = new StringBuilder(); + + lexeme.append((char) buffChar); + nextChar(); + + while (buffChar != '"') { + if (!isChar()) { + throw new Report.Error(currentLocation(), "Invalid character '" + (char) buffChar + "'."); + } + + lexeme.append((char) buffChar); + nextChar(); + } + + lexeme.append((char) buffChar); + buffToken = new Token( + new Report.Location(startLocation, currentLocation()), + Token.Symbol.STRINGCONST, + lexeme.toString() + ); + + nextChar(); + } + + /** + * Vrne trenutni leksikalni simbol, ki ostane v lastnistvu leksikalnega + * analizatorja. + * + * @return Leksikalni simbol. + */ + public Token peekToken() { + if (buffToken == null) + nextToken(); + return buffToken; + } + + /** + * Vrne trenutni leksikalni simbol, ki preide v lastnistvo klicoce kode. + * + * @return Leksikalni simbol. + */ + public Token takeToken() { + if (buffToken == null) + nextToken(); + final Token thisToken = buffToken; + buffToken = null; + return thisToken; + } + +// --- ZAGON --- + + /** + * Zagon leksikalnega analizatorja kot samostojnega programa. + * + * @param cmdLineArgs Argumenti v ukazni vrstici. + */ + public static void main(final String[] cmdLineArgs) { + System.out.println("This is PINS'25 compiler (lexical analysis):"); + + try { + if (cmdLineArgs.length == 0) + throw new Report.Error("No source file specified in the command line."); + if (cmdLineArgs.length > 1) + Report.warning("Unused arguments in the command line."); + + try (LexAn lexAn = new LexAn(cmdLineArgs[0])) { + while (lexAn.peekToken().symbol() != Token.Symbol.EOF) + System.out.println(lexAn.takeToken()); + System.out.println(lexAn.takeToken()); + } + + // Upajmo, da kdaj pridemo to te tocke. + // A zavedajmo se sledecega: + // 1. Prevod je zaradi napak v programu lahko napacen :-o + // 2. Izvorni program se zdalec ni tisto, kar je programer hotel, da bi bil ;-) + Report.info("Done."); + } catch (Report.Error error) { + // Izpis opisa napake. + System.err.println(error.getMessage()); + System.exit(1); + } + } + +} \ No newline at end of file diff --git a/src/pins25/phase/package-info.java b/src/pins25/phase/package-info.java new file mode 100644 index 0000000..980a9ad --- /dev/null +++ b/src/pins25/phase/package-info.java @@ -0,0 +1,6 @@ +/** + * Posamezne faze prevajalnika. + * + * @author bostjan.slivnik@fri.uni-lj.si + */ +package pins25.phase; \ No newline at end of file