WIP LexAn

This commit is contained in:
Gašper Dobrovoljc 2025-03-05 08:44:48 +01:00
commit d88951c225
No known key found for this signature in database
GPG Key ID: 0E7E037018CFA5A5
17 changed files with 645 additions and 0 deletions

BIN
.DS_Store vendored Normal file

Binary file not shown.

1
.gitignore vendored Normal file
View File

@ -0,0 +1 @@
out/

8
.idea/.gitignore generated vendored Normal file
View File

@ -0,0 +1,8 @@
# Default ignored files
/shelf/
/workspace.xml
# Editor-based HTTP Client requests
/httpRequests/
# Datasource local storage ignored files
/dataSources/
/dataSources.local.xml

View File

@ -0,0 +1,12 @@
<component name="InspectionProjectProfileManager">
<profile version="1.0">
<option name="myName" value="Project Default" />
<inspection_tool class="PyUnresolvedReferencesInspection" enabled="true" level="WARNING" enabled_by_default="true">
<option name="ignoredIdentifiers">
<list>
<option value="tests.smoke.test_absences_sliding.TestAbsencesSliding.*" />
</list>
</option>
</inspection_tool>
</profile>
</component>

6
.idea/misc.xml generated Normal file
View File

@ -0,0 +1,6 @@
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="ProjectRootManager" version="2" languageLevel="JDK_23" default="true" project-jdk-name="homebrew-23" project-jdk-type="JavaSDK">
<output url="file://$PROJECT_DIR$/out" />
</component>
</project>

8
.idea/modules.xml generated Normal file
View File

@ -0,0 +1,8 @@
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="ProjectModuleManager">
<modules>
<module fileurl="file://$PROJECT_DIR$/pns.iml" filepath="$PROJECT_DIR$/pns.iml" />
</modules>
</component>
</project>

11
pns.iml Normal file
View File

@ -0,0 +1,11 @@
<?xml version="1.0" encoding="UTF-8"?>
<module type="JAVA_MODULE" version="4">
<component name="NewModuleRootManager" inherit-compiler-output="true">
<exclude-output />
<content url="file://$MODULE_DIR$">
<sourceFolder url="file://$MODULE_DIR$/src" isTestSource="false" />
</content>
<orderEntry type="inheritedJdk" />
<orderEntry type="sourceFolder" forTests="false" />
</component>
</module>

6
prg/Makefile Normal file
View File

@ -0,0 +1,6 @@
JAVA = java --enable-preview
.PHONY : %
% : %.pins25
$(JAVA) -classpath ../bin pins25.phase.LexAn $<

2
prg/test.pins Normal file
View File

@ -0,0 +1,2 @@
'a'
'\n'

BIN
src/.DS_Store vendored Normal file

Binary file not shown.

7
src/module-info.java Normal file
View File

@ -0,0 +1,7 @@
/**
* Implementacija programskega jezika PINS'25.
*
* @author bostjan.slivnik@fri.uni-lj.si
*/
module pins25 {
}

BIN
src/pins25/.DS_Store vendored Normal file

Binary file not shown.

View File

@ -0,0 +1,156 @@
package pins25.common;
/**
* Izpis obvestil, opozoril in napak.
*/
public class Report {
@SuppressWarnings({ "doclint:missing" })
private Report() {
throw new InternalError();
}
/**
* Opis lokacije v izvorni datoteki.
*
* @param begLine Zacetna vrstica.
* @param begColumn Zacetni stolpec.
* @param endLine Koncna vrstica.
* @param endColumn Koncni stolpec.
*/
public record Location(int begLine, int begColumn, int endLine, int endColumn) implements Locatable {
/**
* Ustvari novo lokacijo, ki opisuje en sam znak izvorne datoteke.
*
* @param line Vrstica znaka.
* @param column Stolpec znaka.
*/
public Location(int line, int column) {
this(line, column, line, column);
}
/**
* Ustvari novo lokacijo, ki se razteza od ene do druge lokacije.
*
* @param beg Prva lokacija.
* @param end Druga lokacija.
*/
public Location(Locatable beg, Locatable end) {
this(beg.location().begLine, beg.location().begColumn, end.location().endLine, end.location().endColumn);
}
@Override
public String toString() {
return "[" + (begLine + "." + begColumn) + ":" + (endLine + "." + endColumn) + "]";
}
@Override
public Location location() {
return this;
}
}
/**
* Vmesnik, ki naj ga implementirajo razredi, katerih objekti predstavljajo dele
* izvorne datoteke.
*/
public interface Locatable {
/**
* Vrne lokacijo dela izvorne datoteke, ki ga opisuje objekt.
*
* @return Opis lokacije v izvorni datoteki.
*/
public Location location();
}
/**
* Izpis splosnega obvestila.
*
* @param message Obvestilo.
*/
public static void info(final String message) {
System.out.println(":-) " + message);
}
/**
* Izpis obvestila, ki je vezano na del izvorne datoteke.
*
* @param location Opis lokacije v izvorni datoteki.
* @param message Obvestilo.
*/
public static void info(final Locatable location, final String message) {
System.out.println(":-) " + location + " " + message);
}
/**
* Izpis splosnega opozorila.
*
* @param message Opozorilo.
*/
public static void warning(final String message) {
System.out.println(":-o " + message);
}
/**
* Izpis opozorila, ki je vezano na del izvorne datoteke.
*
* @param location Opis lokacije v izvorni datoteki.
* @param message Opozorilo.
*/
public static void warning(final Locatable location, final String message) {
System.out.println(":-o " + location + " " + message);
}
/**
* Napaka.
*
* Objekt tega razreda se vrze v primeru, ko je program odkril napako v izvorni
* datoteki, zaradi katere ni vec mozno nadaljevati z izvajanjem.
*/
@SuppressWarnings("serial")
public static class Error extends java.lang.Error {
/**
* Ustvari novo napako.
*
* @param message Opis napake.
*/
public Error(final String message) {
super(":-( " + message);
}
/**
* Ustvari novo napako, ki je veznana na del izvorne datoteke.
*
* @param location Opis lokacije v izvorni datoteki.
* @param message Opis napake.
*/
public Error(final Locatable location, final String message) {
super(":-( " + "[" + location.location() + "] " + message);
}
}
/**
* Notranja napaka.
*
* Objekt tega razreda se vze v primeru, ko program zazna notranjo napako.
*/
@SuppressWarnings("serial")
public static class InternalError extends Error {
/**
* Ustvari novo notranjo napako.
*/
public InternalError() {
super("Internal error.");
this.printStackTrace();
}
}
}

View File

@ -0,0 +1,98 @@
package pins25.common;
/**
* Leksikalni simbol.
*
* @param location Lokacija simbola v izvornem programu.
* @param symbol Vrsta simbola.
* @param lexeme Znakovna predstavitev simbola.
*/
public record Token(Report.Location location, Symbol symbol, String lexeme) implements Report.Locatable {
/**
* Vrste leksikalnih simbolov.
*/
public enum Symbol {
/** Konec datoteke. */
EOF,
/** Stevilo. */
INTCONST,
/** Znak. */
CHARCONST,
/** Niz znakov. */
STRINGCONST,
/** Ime. */
IDENTIFIER,
/** Kljucna beseda {@code fun}. */
FUN,
/** Kljucna beseda {@code var}. */
VAR,
/** Kljucna beseda {@code if}. */
IF,
/** Kljucna beseda {@code then}. */
THEN,
/** Kljucna beseda {@code else}. */
ELSE,
/** Kljucna beseda {@code while}. */
WHILE,
/** Kljucna beseda {@code do}. */
DO,
/** Kljucna beseda {@code let}. */
LET,
/** Kljucna beseda {@code in}. */
IN,
/** Kljucna beseda {@code end}. */
END,
/** Simbol {@code =}. */
ASSIGN,
/** Simbol {@code ,}. */
COMMA,
/** Simbol {@code &&}. */
AND,
/** Simbol {@code ||}. */
OR,
/** Simbol {@code !}. */
NOT,
/** Simbol {@code ==}. */
EQU,
/** Simbol {@code !=}. */
NEQ,
/** Simbol {@code >}. */
GTH,
/** Simbol {@code <}. */
LTH,
/** Simbol {@code >=}. */
GEQ,
/** Simbol {@code <=}. */
LEQ,
/** Simbol {@code +}. */
ADD,
/** Simbol {@code -}. */
SUB,
/** Simbol {@code *}. */
MUL,
/** Simbol {@code /}. */
DIV,
/** Simbol {@code %}. */
MOD,
/** Simbol {@code ^}. */
PTR,
/** Simbol {@code (}. */
LPAREN,
/** Simbol {@code )}. */
RPAREN,
}
@Override
public String toString() {
String lexeme = switch (symbol) {
case INTCONST -> "(" + this.lexeme + ")";
case CHARCONST -> "(" + this.lexeme + ")";
case STRINGCONST -> "(" + this.lexeme + ")";
case IDENTIFIER -> "(" + this.lexeme + ")";
default -> "";
};
return location + " " + symbol + lexeme;
}
}

View File

@ -0,0 +1,6 @@
/**
* Koda, ki je skupna vecim fazam prevajalnika.
*
* @author bostjan.slivnik@fri.uni-lj.si
*/
package pins25.common;

318
src/pins25/phase/LexAn.java Normal file
View File

@ -0,0 +1,318 @@
package pins25.phase;
import java.io.*;
import pins25.common.*;
/**
* Leksikalni analizator.
*/
public class LexAn implements AutoCloseable {
/**
* Izvorna datoteka.
*/
private final Reader srcFile;
/**
* Ustvari nov leksikalni analizator.
*
* @param srcFileName Ime izvorne datoteke.
*/
public LexAn(final String srcFileName) {
try {
srcFile = new BufferedReader(new InputStreamReader(new FileInputStream(new File(srcFileName))));
nextChar(); // Pripravi prvi znak izvorne datoteke (glej {@link nextChar}).
} catch (FileNotFoundException __) {
throw new Report.Error("Source file '" + srcFileName + "' not found.");
}
}
@Override
public void close() {
try {
srcFile.close();
} catch (IOException __) {
throw new Report.Error("Cannot close source file.");
}
}
/**
* Trenutni znak izvorne datoteke (glej {@link nextChar}).
*/
private int buffChar = -2;
/**
* Vrstica trenutnega znaka izvorne datoteke (glej {@link nextChar}).
*/
private int buffCharLine = 0;
/**
* Stolpec trenutnega znaka izvorne datoteke (glej {@link nextChar}).
*/
private int buffCharColumn = 0;
/**
* Prebere naslednji znak izvorne datoteke.
* <p>
* Izvorno datoteko beremo znak po znak. Trenutni znak izvorne datoteke je
* shranjen v spremenljivki {@link buffChar}, vrstica in stolpec trenutnega
* znaka izvorne datoteke sta shranjena v spremenljivkah {@link buffCharLine} in
* {@link buffCharColumn}.
* <p>
* Zacetne vrednosti {@link buffChar}, {@link buffCharLine} in
* {@link buffCharColumn} so {@code '\n'}, {@code 0} in {@code 0}: branje prvega
* znaka izvorne datoteke bo na osnovi vrednosti {@code '\n'} spremenljivke
* {@link buffChar} prvemu znaku izvorne datoteke priredilo vrstico 1 in stolpec
* 1.
* <p>
* Pri branju izvorne datoteke se predpostavlja, da je v spremenljivki
* {@link buffChar} ves "cas veljaven znak. Zunaj metode {@link nextChar} so vse
* spremenljivke {@link buffChar}, {@link buffCharLine} in
* {@link buffCharColumn} namenjene le branju.
* <p>
* Vrednost {@code -1} v spremenljivki {@link buffChar} pomeni konec datoteke
* (vrednosti spremenljivk {@link buffCharLine} in {@link buffCharColumn} pa
* nista ve"c veljavni).
*/
private void nextChar() {
try {
switch (buffChar) {
case -2: // Noben znak "se ni bil prebran.
buffChar = srcFile.read();
buffCharLine = buffChar == -1 ? 0 : 1;
buffCharColumn = buffChar == -1 ? 0 : 1;
return;
case -1: // Konec datoteke je bil ze viden.
return;
case '\n': // Prejsnji znak je koncal vrstico, zacne se nova vrstica.
buffChar = srcFile.read();
buffCharLine = buffChar == -1 ? buffCharLine : buffCharLine + 1;
buffCharColumn = buffChar == -1 ? buffCharColumn : 1;
return;
case '\t': // Prejsnji znak je tabulator, ta znak je morda potisnjen v desno.
buffChar = srcFile.read();
while (buffCharColumn % 8 != 0)
buffCharColumn += 1;
buffCharColumn += 1;
return;
default: // Prejsnji znak je brez posebnosti.
buffChar = srcFile.read();
buffCharColumn += 1;
}
} catch (IOException __) {
throw new Report.Error("Cannot read source file.");
}
}
private Report.Location currentLocation() {
return new Report.Location(buffCharLine, buffCharColumn);
}
/**
* Trenutni leksikalni simbol.
* <p>
* "Ce vrednost spremenljivke {@code buffToken} ni {@code null}, je simbol "ze
* prebran iz vhodne datoteke, ni pa "se predan naprej sintaksnemu analizatorju.
* Ta simbol je dostopen z metodama {@link peekToken} in {@link takeToken}.
*/
private Token buffToken = null;
/**
* Prebere naslednji leksikalni simbol, ki je nato dostopen preko metod
* {@link peekToken} in {@link takeToken}.
*/
private void nextToken() {
while (buffChar == '\n') {
nextChar();
}
switch (buffChar) {
case -1: // EOF
buffToken = new Token(currentLocation(), Token.Symbol.EOF, null);
return;
case '\'':
charConst();
return;
case '"':
stringConst();
return;
}
if (buffChar >= '0' && buffChar <= '9') {
intConst();
return;
}
throw new Report.Error(currentLocation(), "Unrecognized character '" + (char) buffChar + "'.");
}
private boolean isDigit() {
return buffChar >= '0' && buffChar <= '9';
}
private boolean isChar() {
return buffChar >= ' ' && buffChar <= '~';
}
private boolean isHex() {
return buffChar >= '0' && buffChar <= '9' || buffChar >= 'a' && buffChar <= 'f';
}
private void intConst() {
Report.Location startLocation = currentLocation();
Report.Location endLocation = currentLocation();
StringBuilder lexeme = new StringBuilder();
if (buffChar == '0') {
lexeme.append((char) buffChar);
nextChar();
if (isDigit()) {
throw new Report.Error(startLocation, "Leading zero is not allowed.");
}
} else {
while (isDigit()) {
lexeme.append((char) buffChar);
endLocation = currentLocation();
nextChar();
}
}
buffToken = new Token(
new Report.Location(startLocation, endLocation),
Token.Symbol.INTCONST,
lexeme.toString()
);
}
private void charConst() {
Report.Location startLocation = currentLocation();
StringBuilder lexeme = new StringBuilder();
lexeme.append((char) buffChar);
nextChar();
if (!isChar()) {
throw new Report.Error(startLocation, "Invalid character '" + (char) buffChar + "'.");
}
lexeme.append((char) buffChar);
if (buffChar == '\\') {
nextChar();
lexeme.append((char) buffChar);
if (buffChar == 'n' || buffChar == '\\' || buffChar == '\'') {
} else if (isHex()) {
nextChar();
lexeme.append((char) buffChar);
if (!isHex()) {
throw new Report.Error(currentLocation(), "Invalid ascii code '" + (char) buffChar + "'.");
}
} else {
throw new Report.Error(currentLocation(), "Invalid escaped character '" + (char) buffChar + "'.");
}
}
nextChar();
if (buffChar != '\'') {
throw new Report.Error(new Report.Location(startLocation, currentLocation()), "Unterminated character.");
}
lexeme.append((char) buffChar);
buffToken = new Token(
new Report.Location(startLocation, currentLocation()),
Token.Symbol.CHARCONST,
lexeme.toString()
);
nextChar();
}
private void stringConst() {
Report.Location startLocation = currentLocation();
StringBuilder lexeme = new StringBuilder();
lexeme.append((char) buffChar);
nextChar();
while (buffChar != '"') {
if (!isChar()) {
throw new Report.Error(currentLocation(), "Invalid character '" + (char) buffChar + "'.");
}
lexeme.append((char) buffChar);
nextChar();
}
lexeme.append((char) buffChar);
buffToken = new Token(
new Report.Location(startLocation, currentLocation()),
Token.Symbol.STRINGCONST,
lexeme.toString()
);
nextChar();
}
/**
* Vrne trenutni leksikalni simbol, ki ostane v lastnistvu leksikalnega
* analizatorja.
*
* @return Leksikalni simbol.
*/
public Token peekToken() {
if (buffToken == null)
nextToken();
return buffToken;
}
/**
* Vrne trenutni leksikalni simbol, ki preide v lastnistvo klicoce kode.
*
* @return Leksikalni simbol.
*/
public Token takeToken() {
if (buffToken == null)
nextToken();
final Token thisToken = buffToken;
buffToken = null;
return thisToken;
}
// --- ZAGON ---
/**
* Zagon leksikalnega analizatorja kot samostojnega programa.
*
* @param cmdLineArgs Argumenti v ukazni vrstici.
*/
public static void main(final String[] cmdLineArgs) {
System.out.println("This is PINS'25 compiler (lexical analysis):");
try {
if (cmdLineArgs.length == 0)
throw new Report.Error("No source file specified in the command line.");
if (cmdLineArgs.length > 1)
Report.warning("Unused arguments in the command line.");
try (LexAn lexAn = new LexAn(cmdLineArgs[0])) {
while (lexAn.peekToken().symbol() != Token.Symbol.EOF)
System.out.println(lexAn.takeToken());
System.out.println(lexAn.takeToken());
}
// Upajmo, da kdaj pridemo to te tocke.
// A zavedajmo se sledecega:
// 1. Prevod je zaradi napak v programu lahko napacen :-o
// 2. Izvorni program se zdalec ni tisto, kar je programer hotel, da bi bil ;-)
Report.info("Done.");
} catch (Report.Error error) {
// Izpis opisa napake.
System.err.println(error.getMessage());
System.exit(1);
}
}
}

View File

@ -0,0 +1,6 @@
/**
* Posamezne faze prevajalnika.
*
* @author bostjan.slivnik@fri.uni-lj.si
*/
package pins25.phase;