Token.java

package net.morimekta.lexer;

import net.morimekta.strings.chr.CharSlice;

import java.util.Arrays;
import java.util.Objects;

import static java.util.Objects.requireNonNull;
import static net.morimekta.strings.EscapeUtil.javaUnEscape;

/**
 * A token parsed out of a tokenizer.
 *
 * @param <Type> The token type represented by the token.
 */
public class Token<Type> extends CharSlice {
    /**
     * Create a slice instance. The slice is only meant to be internal state
     * immutable, and not representing an immutable byte content.
     *
     * @param fb  The buffer to wrap.
     * @param off The start offset to wrap.
     * @param len The length to represent.
     * @param type The token type represented.
     * @param lineNo The current line number, starting at 1.
     * @param linePos The current line position, starting at 1.
     */
    public Token(char[] fb, int off, int len, Type type, int lineNo, int linePos) {
        super(fb, off, len);
        this.type = requireNonNull(type, "type == null");
        this.lineNo = lineNo;
        this.linePos = linePos;
    }

    /**
     * @return The token type.
     */
    public Type type() {
        return type;
    }

    /**
     * @return The line where the token is.
     */
    public CharSequence line() {
        if (line == null) {
            int start = off + 1 - linePos;
            int end   = off + len - 1;
            while (end < fb.length && fb[end] != '\n' && fb[end] != '\r' && fb[end] > 0) {
                ++end;
            }
            if (start < 0) {
                // pad start of line with spaces to make it the correct length.
                line = (start < -3
                        ? "ยท".repeat(-3 - start) + "   "
                        : " ".repeat(-start)) +
                       new CharSlice(fb, 0, end);
            } else {
                line = new CharSlice(fb, start, end - start);
            }
        }
        return line;
    }

    /**
     * @return The the line number where this token is.
     */
    public int lineNo() {
        return lineNo;
    }

    /**
     * @return The the line position where the token starts.
     */
    public int linePos() {
        return linePos;
    }

    /**
     * Get the whole slice as a string. This assumes single a leading and
     * closing quote symbol, which will be ignored.
     *
     * @param strict If it should validate string content strictly.
     * @return Slice decoded as UTF_8 string handling escaped characters.
     */
    public String decodeString(boolean strict) {
        if (len < 2) {
            throw new IllegalStateException("Too short for string decoding, len " + len + " string '" + toString() + "'");
        }
        return javaUnEscape(subSlice(1, -1), strict);
    }

    /**
     * @param symbol The symbol char to check.
     * @return If the token is this symbol.
     */
    public boolean isSymbol(char symbol) {
        return len == 1 && fb[off] == symbol;
    }

    // --- Object ---

    @Override
    public boolean equals(Object o) {
        if (o == this) {
            return true;
        }
        if (o == null || !o.getClass().equals(getClass())) {
            return false;
        }
        Token<?> other = (Token<?>) o;

        return toString().equals(other.toString()) &&
               off == other.off &&
               len == other.len &&
               type == other.type &&
               lineNo == other.lineNo &&
               linePos == other.linePos;
    }

    @Override
    public int hashCode() {
        return Objects.hash(getClass(), Arrays.hashCode(fb), off, len, type, lineNo, linePos);
    }

    // --- PRIVATE ---
    /** Token type. */
    protected final Type type;
    /** Line number. */
    protected final int  lineNo;
    /** Line position. */
    protected final int  linePos;

    private transient CharSequence line;
}