Lexer.java

  1. /*
  2.  * Copyright (c) 2015-2020, Stein Eldar Johnsen
  3.  *
  4.  * Licensed to the Apache Software Foundation (ASF) under one
  5.  * or more contributor license agreements. See the NOTICE file
  6.  * distributed with this work for additional information
  7.  * regarding copyright ownership. The ASF licenses this file
  8.  * to you under the Apache License, Version 2.0 (the
  9.  * "License"); you may not use this file except in compliance
  10.  * with the License. You may obtain a copy of the License at
  11.  *
  12.  *   http://www.apache.org/licenses/LICENSE-2.0
  13.  *
  14.  * Unless required by applicable law or agreed to in writing,
  15.  * software distributed under the License is distributed on an
  16.  * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
  17.  * KIND, either express or implied. See the License for the
  18.  * specific language governing permissions and limitations
  19.  * under the License.
  20.  */
  21. package net.morimekta.lexer;

  22. import java.io.IOException;
  23. import java.io.UncheckedIOException;
  24. import java.util.Iterator;
  25. import java.util.function.Predicate;

  26. import static net.morimekta.strings.EscapeUtil.javaEscape;

  27. /**
  28.  * Base lexer class with helper methods that does not need to be
  29.  * implemented. The base lexer should be able to continuously return
  30.  * tokens until end of the stream, or the lexer process fails.
  31.  *
  32.  * @param <TT> The token-type enum class.
  33.  * @param <T> The token implementation class.
  34.  */
  35. public class Lexer<TT extends TokenType, T extends Token<TT>> implements Iterable<T> {
  36.     /**
  37.      * Create a lexer instance using a specific tokenizer.
  38.      *
  39.      * @param tokenizer The tokenizer to be used to get tokens.
  40.      */
  41.     protected Lexer(Tokenizer<TT, T> tokenizer) {
  42.         this.tokenizer = tokenizer;
  43.     }

  44.     /**
  45.      * Make a lexing / parsing failure exception.
  46.      *
  47.      * @param line The line for the failure.
  48.      * @param lineNo The line no for the failure.
  49.      * @param linePos The line pos for the failure.
  50.      * @param message The message for the failure.
  51.      * @param args Arguments to format message.
  52.      * @return The failure exception.
  53.      */
  54.     protected LexerException eofFailure(CharSequence line, int lineNo, int linePos, String message, Object... args) {
  55.         if (args.length > 0) {
  56.             message = String.format(message, args);
  57.         }
  58.         return new LexerException(line, lineNo, linePos, 1, message);
  59.     }

  60.     /**
  61.      * Make a lexing / parsing failure exception.
  62.      *
  63.      * @param token The token causing the failure.
  64.      * @param message The message for the failure.
  65.      * @param args Arguments for formatting message.
  66.      * @return The failure exception.
  67.      */
  68.     public LexerException failure(T token, String message, Object... args) {
  69.         if (args.length > 0) {
  70.             message = String.format(message, args);
  71.         }
  72.         return new LexerException(token, message);
  73.     }

  74.     /**
  75.      * Consume and return the next token. This should not
  76.      * trigger parsing anything after this token.
  77.      *
  78.      * @return The next token, or null if it's end of the stream.
  79.      * @throws LexerException If parsing token failed.
  80.      * @throws IOException If reading failed.
  81.      */
  82.     public T next() throws LexerException, IOException {
  83.         // --- PRIVATE ---
  84.         T lastToken;
  85.         if (nextToken != null) {
  86.             lastToken = nextToken;
  87.             nextToken = null;
  88.         } else {
  89.             lastToken = tokenizer.parseNextToken();
  90.         }
  91.         return lastToken;
  92.     }

  93.     /**
  94.      * Return true if there is a 'next' token. If this method returns
  95.      * true, then 'peek' must return non-null until otherwise modified, and
  96.      * the next call to 'next' must return non-null.
  97.      *
  98.      * @return If there is a next token.
  99.      * @throws LexerException If parsing token failed.
  100.      * @throws IOException If reading failed.
  101.      */
  102.     public boolean hasNext() throws LexerException, IOException {
  103.         return peek() != null;
  104.     }

  105.     /**
  106.      * Return the token that will be returned by 'next', but do not
  107.      * 'consume' it. If this method returns a non-null value, 'next'
  108.      * must return the same value exactly once.
  109.      *
  110.      * @return The next token.
  111.      * @throws LexerException If parsing token failed.
  112.      * @throws IOException If reading failed.
  113.      */
  114.     public T peek() throws LexerException, IOException {
  115.         if (nextToken == null) {
  116.             nextToken = tokenizer.parseNextToken();
  117.         }
  118.         return nextToken;
  119.     }

  120.     /**
  121.      * Peek the next token, and fail if the token is not present.
  122.      *
  123.      * @param what The exception message on failure.
  124.      * @return The token to be the next.
  125.      * @throws LexerException On parse errors.
  126.      * @throws IOException If reading failed.
  127.      */
  128.     public T peek(String what) throws LexerException, IOException {
  129.         T token = peek();
  130.         if (token == null) {
  131.             throw eofFailure(tokenizer.currentLine(),
  132.                              tokenizer.currentLineNo(),
  133.                              tokenizer.currentLinePos(),
  134.                              "Expected %s, but got end of file",
  135.                              what);
  136.         }
  137.         return token;
  138.     }

  139.     /**
  140.      * Expect a new token, and fail there is no next token.
  141.      *
  142.      * @param what What is expected.
  143.      * @return The next token.
  144.      * @throws LexerException On parse errors or missing next token.
  145.      * @throws IOException If reading failed.
  146.      */
  147.     public T expect(String what) throws LexerException, IOException {
  148.         T next = next();
  149.         if (next == null) {
  150.             throw eofFailure(tokenizer.currentLine(),
  151.                              tokenizer.currentLineNo(),
  152.                              tokenizer.currentLinePos(),
  153.                              "Expected %s, but got end of file",
  154.                              what);
  155.         }
  156.         return next;
  157.     }

  158.     /**
  159.      * Expect a new token, and fail if the token is not of the given token type.
  160.      *
  161.      * @param what The exception message on failure.
  162.      * @param type The token type being expected.
  163.      * @return The token to be the next.
  164.      * @throws LexerException On parse errors or validation failures.
  165.      * @throws IOException If reading failed.
  166.      */
  167.     public T expect(String what, TT type) throws LexerException, IOException {
  168.         T token = expect(what);
  169.         if (!token.type().equals(type)) {
  170.             nextToken = token;
  171.             throw failure(token, "Expected %s, but got '%s'", what, javaEscape(token));
  172.         }
  173.         return token;
  174.     }

  175.     /**
  176.      * Expect a new token, and fail if the token does not validate.
  177.      *
  178.      * @param what The exception message on failure.
  179.      * @param validator Validator to check on the token.
  180.      * @return The token to be the next.
  181.      * @throws LexerException On parse errors or validation failure.
  182.      * @throws IOException If reading failed.
  183.      */
  184.     public T expect(String what, Predicate<T> validator) throws LexerException, IOException {
  185.         T token = expect(what);
  186.         if (!validator.test(token)) {
  187.             nextToken = token;
  188.             throw failure(token, "Expected %s, but got '%s'", what, javaEscape(token));
  189.         }
  190.         return token;
  191.     }

  192.     /**
  193.      * @param what The exception message on failure.
  194.      * @param symbols Symbols to be expected.
  195.      * @return The token of the symbol.
  196.      * @throws LexerException On parse errors or validation failure.
  197.      * @throws IOException If unable to parse token, or not applicable symbol.
  198.      */
  199.     public T expectSymbol(String what, char... symbols) throws LexerException, IOException {
  200.         return expect(what, t -> {
  201.             if (t.length() != 1) {
  202.                 return false;
  203.             }
  204.             for (char s : symbols) {
  205.                 if (t.isSymbol(s)) {
  206.                     return true;
  207.                 }
  208.             }
  209.             return false;
  210.         });
  211.     }

  212.     /**
  213.      * Read until termination string.
  214.      *
  215.      * @param term The termination string.
  216.      * @param type The type of token to be generated.
  217.      * @param allowEof If we allow end of file to termainate the token.
  218.      * @return The read token if it has any size.
  219.      * @throws LexerException On parse errors or validation failure.
  220.      * @throws IOException If unable to parse token.
  221.      */
  222.     public T readUntil(String term, TT type, boolean allowEof) throws LexerException, IOException {
  223.         nextToken = null;
  224.         return tokenizer.readUntil(term, type, allowEof);
  225.     }

  226.     @Override
  227.     public Iterator<T> iterator() {
  228.         return new LexerIterator();
  229.     }

  230.     // --- PRIVATE ---

  231.     private final Tokenizer<TT, T> tokenizer;
  232.     private T nextToken;

  233.     private class LexerIterator implements Iterator<T> {
  234.         @Override
  235.         public boolean hasNext() {
  236.             try {
  237.                 return Lexer.this.hasNext();
  238.             } catch (LexerException e) {
  239.                 throw new UncheckedLexerException(e);
  240.             } catch (IOException e) {
  241.                 throw new UncheckedIOException(e.getMessage(), e);
  242.             }
  243.         }

  244.         @Override
  245.         public T next() {
  246.             try {
  247.                 return Lexer.this.expect("anything");
  248.             } catch (LexerException e) {
  249.                 throw new UncheckedLexerException(e);
  250.             } catch (IOException e) {
  251.                 throw new UncheckedIOException(e.getMessage(), e);
  252.             }
  253.         }
  254.     }

  255.     @Override
  256.     public String toString() {
  257.         return getClass().getSimpleName() + "{tokenizer=" + tokenizer + ", next=" + nextToken + "}";
  258.     }
  259. }