Tokenizer.java
/*
* Copyright (c) 2015-2020, Stein Eldar Johnsen
*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package net.morimekta.lexer;
import java.io.IOException;
import java.util.List;
/**
* Interface for a tokenizer.
*
* @param <TT> The token type generic type.
* @param <T> The token instance type.
*/
public interface Tokenizer<TT, T extends Token<TT>> {
/**
* Continue parsing content and return the next token to be found.
*
* @return The next token, or null if there is none.
* @throws LexerException If parsing token failed.
* @throws IOException If reading failed.
*/
T parseNextToken() throws LexerException, IOException;
/**
* @return True if the tokenizer has accumulated any skipped tokens
* since the last call to {@link #clearSkippedTokens()}.
*/
boolean hasSkippedTokens();
/**
* @return An immutable copy of the currently accumulated skipped tokens.
*/
List<T> getSkippedTokens();
/**
* Return and clear the accumulated skipped tokens.
*
* @return An immutable copy of the skipped tokens before clearing.
*/
List<T> clearSkippedTokens();
/**
* Read all content until the given terminator string is encountered. The
* terminator should <b>not</b> become part of the returned token. The
* returned sequence may span more than one line.
*
* @param terminator The terminator string.
* @param type The token type for the resulting token.
* @param allowEof Set to true if EOF is allowed as a replacement for the terminator.
* @return The char sequence from the current position until the encountered
* terminator or the end of the file. Or null if no chars before the terminator.
* @throws LexerException On parse errors or validation failure.
* @throws IOException If unable to parse token.
*/
T readUntil(CharSequence terminator,
TT type,
boolean allowEof) throws LexerException, IOException;
/**
* Get the current line number. Note that the lines should be 1-indexed, meaning
* the first line (before the first newline) is line no 1. This should point to
* the position where it will continue parsing, any previous tokens should be
* represented by itself.
*
* @return The current line number.
*/
int currentLineNo();
/**
* Get the current line position. This should point to the char position of the
* current line, meaning it is 0-indexed. This should point to
* the position where it will continue parsing, any previous tokens should be
* represented by itself.
*
* @return The current line position.
*/
int currentLinePos();
/**
* @return Content of the current line. The char-sequence must be effectively
* immutable.
*/
CharSequence currentLine();
/**
* If the specified token should be skipped and collected as a 'skipped' token
* instead.
*
* @param token The token to check.
* @return If the token should be skipped.
*/
default boolean skipTokenOnParseNext(T token) {
return false;
}
}