Lexer.java
- /*
- * Copyright (c) 2015-2020, Stein Eldar Johnsen
- *
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied. See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
- package net.morimekta.lexer;
- import java.io.IOException;
- import java.io.UncheckedIOException;
- import java.util.Iterator;
- import java.util.function.Predicate;
- import static net.morimekta.strings.EscapeUtil.javaEscape;
- /**
- * Base lexer class with helper methods that does not need to be
- * implemented. The base lexer should be able to continuously return
- * tokens until end of the stream, or the lexer process fails.
- *
- * @param <TT> The token-type enum class.
- * @param <T> The token implementation class.
- */
- public class Lexer<TT extends TokenType, T extends Token<TT>> implements Iterable<T> {
- /**
- * Create a lexer instance using a specific tokenizer.
- *
- * @param tokenizer The tokenizer to be used to get tokens.
- */
- protected Lexer(Tokenizer<TT, T> tokenizer) {
- this.tokenizer = tokenizer;
- }
- /**
- * Make a lexing / parsing failure exception.
- *
- * @param line The line for the failure.
- * @param lineNo The line no for the failure.
- * @param linePos The line pos for the failure.
- * @param message The message for the failure.
- * @param args Arguments to format message.
- * @return The failure exception.
- */
- protected LexerException eofFailure(CharSequence line, int lineNo, int linePos, String message, Object... args) {
- if (args.length > 0) {
- message = String.format(message, args);
- }
- return new LexerException(line, lineNo, linePos, 1, message);
- }
- /**
- * Make a lexing / parsing failure exception.
- *
- * @param token The token causing the failure.
- * @param message The message for the failure.
- * @param args Arguments for formatting message.
- * @return The failure exception.
- */
- public LexerException failure(T token, String message, Object... args) {
- if (args.length > 0) {
- message = String.format(message, args);
- }
- return new LexerException(token, message);
- }
- /**
- * Consume and return the next token. This should not
- * trigger parsing anything after this token.
- *
- * @return The next token, or null if it's end of the stream.
- * @throws LexerException If parsing token failed.
- * @throws IOException If reading failed.
- */
- public T next() throws LexerException, IOException {
- // --- PRIVATE ---
- T lastToken;
- if (nextToken != null) {
- lastToken = nextToken;
- nextToken = null;
- } else {
- lastToken = tokenizer.parseNextToken();
- }
- return lastToken;
- }
- /**
- * Return true if there is a 'next' token. If this method returns
- * true, then 'peek' must return non-null until otherwise modified, and
- * the next call to 'next' must return non-null.
- *
- * @return If there is a next token.
- * @throws LexerException If parsing token failed.
- * @throws IOException If reading failed.
- */
- public boolean hasNext() throws LexerException, IOException {
- return peek() != null;
- }
- /**
- * Return the token that will be returned by 'next', but do not
- * 'consume' it. If this method returns a non-null value, 'next'
- * must return the same value exactly once.
- *
- * @return The next token.
- * @throws LexerException If parsing token failed.
- * @throws IOException If reading failed.
- */
- public T peek() throws LexerException, IOException {
- if (nextToken == null) {
- nextToken = tokenizer.parseNextToken();
- }
- return nextToken;
- }
- /**
- * Peek the next token, and fail if the token is not present.
- *
- * @param what The exception message on failure.
- * @return The token to be the next.
- * @throws LexerException On parse errors.
- * @throws IOException If reading failed.
- */
- public T peek(String what) throws LexerException, IOException {
- T token = peek();
- if (token == null) {
- throw eofFailure(tokenizer.currentLine(),
- tokenizer.currentLineNo(),
- tokenizer.currentLinePos(),
- "Expected %s, but got end of file",
- what);
- }
- return token;
- }
- /**
- * Expect a new token, and fail there is no next token.
- *
- * @param what What is expected.
- * @return The next token.
- * @throws LexerException On parse errors or missing next token.
- * @throws IOException If reading failed.
- */
- public T expect(String what) throws LexerException, IOException {
- T next = next();
- if (next == null) {
- throw eofFailure(tokenizer.currentLine(),
- tokenizer.currentLineNo(),
- tokenizer.currentLinePos(),
- "Expected %s, but got end of file",
- what);
- }
- return next;
- }
- /**
- * Expect a new token, and fail if the token is not of the given token type.
- *
- * @param what The exception message on failure.
- * @param type The token type being expected.
- * @return The token to be the next.
- * @throws LexerException On parse errors or validation failures.
- * @throws IOException If reading failed.
- */
- public T expect(String what, TT type) throws LexerException, IOException {
- T token = expect(what);
- if (!token.type().equals(type)) {
- nextToken = token;
- throw failure(token, "Expected %s, but got '%s'", what, javaEscape(token));
- }
- return token;
- }
- /**
- * Expect a new token, and fail if the token does not validate.
- *
- * @param what The exception message on failure.
- * @param validator Validator to check on the token.
- * @return The token to be the next.
- * @throws LexerException On parse errors or validation failure.
- * @throws IOException If reading failed.
- */
- public T expect(String what, Predicate<T> validator) throws LexerException, IOException {
- T token = expect(what);
- if (!validator.test(token)) {
- nextToken = token;
- throw failure(token, "Expected %s, but got '%s'", what, javaEscape(token));
- }
- return token;
- }
- /**
- * @param what The exception message on failure.
- * @param symbols Symbols to be expected.
- * @return The token of the symbol.
- * @throws LexerException On parse errors or validation failure.
- * @throws IOException If unable to parse token, or not applicable symbol.
- */
- public T expectSymbol(String what, char... symbols) throws LexerException, IOException {
- return expect(what, t -> {
- if (t.length() != 1) {
- return false;
- }
- for (char s : symbols) {
- if (t.isSymbol(s)) {
- return true;
- }
- }
- return false;
- });
- }
- /**
- * Read until termination string.
- *
- * @param term The termination string.
- * @param type The type of token to be generated.
- * @param allowEof If we allow end of file to termainate the token.
- * @return The read token if it has any size.
- * @throws LexerException On parse errors or validation failure.
- * @throws IOException If unable to parse token.
- */
- public T readUntil(String term, TT type, boolean allowEof) throws LexerException, IOException {
- nextToken = null;
- return tokenizer.readUntil(term, type, allowEof);
- }
- @Override
- public Iterator<T> iterator() {
- return new LexerIterator();
- }
- // --- PRIVATE ---
- private final Tokenizer<TT, T> tokenizer;
- private T nextToken;
- private class LexerIterator implements Iterator<T> {
- @Override
- public boolean hasNext() {
- try {
- return Lexer.this.hasNext();
- } catch (LexerException e) {
- throw new UncheckedLexerException(e);
- } catch (IOException e) {
- throw new UncheckedIOException(e.getMessage(), e);
- }
- }
- @Override
- public T next() {
- try {
- return Lexer.this.expect("anything");
- } catch (LexerException e) {
- throw new UncheckedLexerException(e);
- } catch (IOException e) {
- throw new UncheckedIOException(e.getMessage(), e);
- }
- }
- }
- @Override
- public String toString() {
- return getClass().getSimpleName() + "{tokenizer=" + tokenizer + ", next=" + nextToken + "}";
- }
- }