Lexer.java
/*
* Copyright (c) 2015-2020, Stein Eldar Johnsen
*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package net.morimekta.lexer;
import java.io.IOException;
import java.io.UncheckedIOException;
import java.util.Iterator;
import java.util.function.Predicate;
import static net.morimekta.strings.EscapeUtil.javaEscape;
/**
* Base lexer class with helper methods that does not need to be
* implemented. The base lexer should be able to continuously return
* tokens until end of the stream, or the lexer process fails.
*
* @param <TT> The token-type enum class.
* @param <T> The token implementation class.
*/
public class Lexer<TT extends TokenType, T extends Token<TT>> implements Iterable<T> {
/**
* Create a lexer instance using a specific tokenizer.
*
* @param tokenizer The tokenizer to be used to get tokens.
*/
protected Lexer(Tokenizer<TT, T> tokenizer) {
this.tokenizer = tokenizer;
}
/**
* Make a lexing / parsing failure exception.
*
* @param line The line for the failure.
* @param lineNo The line no for the failure.
* @param linePos The line pos for the failure.
* @param message The message for the failure.
* @param args Arguments to format message.
* @return The failure exception.
*/
protected LexerException eofFailure(CharSequence line, int lineNo, int linePos, String message, Object... args) {
if (args.length > 0) {
message = String.format(message, args);
}
return new LexerException(line, lineNo, linePos, 1, message);
}
/**
* Make a lexing / parsing failure exception.
*
* @param token The token causing the failure.
* @param message The message for the failure.
* @param args Arguments for formatting message.
* @return The failure exception.
*/
public LexerException failure(T token, String message, Object... args) {
if (args.length > 0) {
message = String.format(message, args);
}
return new LexerException(token, message);
}
/**
* Consume and return the next token. This should not
* trigger parsing anything after this token.
*
* @return The next token, or null if it's end of the stream.
* @throws LexerException If parsing token failed.
* @throws IOException If reading failed.
*/
public T next() throws LexerException, IOException {
// --- PRIVATE ---
T lastToken;
if (nextToken != null) {
lastToken = nextToken;
nextToken = null;
} else {
lastToken = tokenizer.parseNextToken();
}
return lastToken;
}
/**
* Return true if there is a 'next' token. If this method returns
* true, then 'peek' must return non-null until otherwise modified, and
* the next call to 'next' must return non-null.
*
* @return If there is a next token.
* @throws LexerException If parsing token failed.
* @throws IOException If reading failed.
*/
public boolean hasNext() throws LexerException, IOException {
return peek() != null;
}
/**
* Return the token that will be returned by 'next', but do not
* 'consume' it. If this method returns a non-null value, 'next'
* must return the same value exactly once.
*
* @return The next token.
* @throws LexerException If parsing token failed.
* @throws IOException If reading failed.
*/
public T peek() throws LexerException, IOException {
if (nextToken == null) {
nextToken = tokenizer.parseNextToken();
}
return nextToken;
}
/**
* Peek the next token, and fail if the token is not present.
*
* @param what The exception message on failure.
* @return The token to be the next.
* @throws LexerException On parse errors.
* @throws IOException If reading failed.
*/
public T peek(String what) throws LexerException, IOException {
T token = peek();
if (token == null) {
throw eofFailure(tokenizer.currentLine(),
tokenizer.currentLineNo(),
tokenizer.currentLinePos(),
"Expected %s, but got end of file",
what);
}
return token;
}
/**
* Expect a new token, and fail there is no next token.
*
* @param what What is expected.
* @return The next token.
* @throws LexerException On parse errors or missing next token.
* @throws IOException If reading failed.
*/
public T expect(String what) throws LexerException, IOException {
T next = next();
if (next == null) {
throw eofFailure(tokenizer.currentLine(),
tokenizer.currentLineNo(),
tokenizer.currentLinePos(),
"Expected %s, but got end of file",
what);
}
return next;
}
/**
* Expect a new token, and fail if the token is not of the given token type.
*
* @param what The exception message on failure.
* @param type The token type being expected.
* @return The token to be the next.
* @throws LexerException On parse errors or validation failures.
* @throws IOException If reading failed.
*/
public T expect(String what, TT type) throws LexerException, IOException {
T token = expect(what);
if (!token.type().equals(type)) {
nextToken = token;
throw failure(token, "Expected %s, but got '%s'", what, javaEscape(token));
}
return token;
}
/**
* Expect a new token, and fail if the token does not validate.
*
* @param what The exception message on failure.
* @param validator Validator to check on the token.
* @return The token to be the next.
* @throws LexerException On parse errors or validation failure.
* @throws IOException If reading failed.
*/
public T expect(String what, Predicate<T> validator) throws LexerException, IOException {
T token = expect(what);
if (!validator.test(token)) {
nextToken = token;
throw failure(token, "Expected %s, but got '%s'", what, javaEscape(token));
}
return token;
}
/**
* @param what The exception message on failure.
* @param symbols Symbols to be expected.
* @return The token of the symbol.
* @throws LexerException On parse errors or validation failure.
* @throws IOException If unable to parse token, or not applicable symbol.
*/
public T expectSymbol(String what, char... symbols) throws LexerException, IOException {
return expect(what, t -> {
if (t.length() != 1) {
return false;
}
for (char s : symbols) {
if (t.isSymbol(s)) {
return true;
}
}
return false;
});
}
/**
* Read until termination string.
*
* @param term The termination string.
* @param type The type of token to be generated.
* @param allowEof If we allow end of file to termainate the token.
* @return The read token if it has any size.
* @throws LexerException On parse errors or validation failure.
* @throws IOException If unable to parse token.
*/
public T readUntil(String term, TT type, boolean allowEof) throws LexerException, IOException {
nextToken = null;
return tokenizer.readUntil(term, type, allowEof);
}
@Override
public Iterator<T> iterator() {
return new LexerIterator();
}
// --- PRIVATE ---
private final Tokenizer<TT, T> tokenizer;
private T nextToken;
private class LexerIterator implements Iterator<T> {
@Override
public boolean hasNext() {
try {
return Lexer.this.hasNext();
} catch (LexerException e) {
throw new UncheckedLexerException(e);
} catch (IOException e) {
throw new UncheckedIOException(e.getMessage(), e);
}
}
@Override
public T next() {
try {
return Lexer.this.expect("anything");
} catch (LexerException e) {
throw new UncheckedLexerException(e);
} catch (IOException e) {
throw new UncheckedIOException(e.getMessage(), e);
}
}
}
@Override
public String toString() {
return getClass().getSimpleName() + "{tokenizer=" + tokenizer + ", next=" + nextToken + "}";
}
}