PrettyTokenizer.java

/*
 * Copyright 2015-2016 Providence Authors
 *
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements. See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership. The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License. You may obtain a copy of the License at
 *
 *   http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing,
 * software distributed under the License is distributed on an
 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 * KIND, either express or implied. See the License for the
 * specific language governing permissions and limitations
 * under the License.
 */
package net.morimekta.providence.serializer.pretty;

import net.morimekta.util.io.Utf8StreamReader;
import net.morimekta.util.lexer.TokenizerBase;

import javax.annotation.Nonnull;
import java.io.InputStream;
import java.io.Reader;

/**
 * Simple tokenizer for the pretty serializer that strips away comments based
 * on the "#" (shell) comment character. Each comment lasts until the next
 * newline.
 */
public class PrettyTokenizer extends TokenizerBase<PrettyTokenType, PrettyToken> {
    /**
     * Create a pretty tokenizer that reads from the input steam. It will only
     * read as far as requested, and no bytes further. It has no checking of
     * whether the document follows the JSON standard, but will only accept
     * JSON formatted tokens.
     *
     * Note that the content is assumed to be separated with newlines, which
     * means that if multiple JSON contents are read from the same stream, they
     * MUST have a separating newline. A single JSON object may still have
     * newlines in it's stream.
     *
     * @param in Input stream to parse from.
     */
    public PrettyTokenizer(InputStream in) {
        this(in, DEFAULT_BUFFER_SIZE);
    }

    /**
     * Create a pretty tokenizer that reads from the input steam. It will only
     * read as far as requested, and no bytes further. It has no checking of
     * whether the document follows the JSON standard, but will only accept
     * JSON formatted tokens.
     *
     * Note that the content is assumed to be separated with newlines, which
     * means that if multiple JSON contents are read from the same stream, they
     * MUST have a separating newline. A single JSON object may still have
     * newlines in it's stream.
     *
     * @param in Input stream to parse from.
     * @param bufferSize The size of the char buffer. Default is 2048 chars
     *                   (4096 bytes).
     */
    public PrettyTokenizer(InputStream in, int bufferSize) {
        this(new Utf8StreamReader(in), bufferSize, false);
    }

    /**
     * Create a pretty tokenizer that will read everything from the input stream and
     * handle it as a single multi-line buffer.
     *
     * @param in Reader of content to parse.
     * @param bufferSize The size of the char buffer. Default is 2048 chars
     *                   (4096 bytes).
     * @param preLoadAll Load all content up front. Handy for config and thrift
     *                   program files.
     */
    public PrettyTokenizer(Reader in, int bufferSize, boolean preLoadAll) {
        super(in, bufferSize, preLoadAll);
        // If the line is longer than 2k, it will not be used in error messages.
    }

    @Override
    protected boolean startString() {
        return lastChar == '\"' || lastChar == '\'';
    }

    @Override
    protected PrettyToken genericToken(char[] buffer, int offset, int len, @Nonnull PrettyTokenType type, int lineNo, int linePos) {
        return new PrettyToken(buffer, offset, len, type, lineNo, linePos);
    }

    @Override
    protected PrettyToken identifierToken(char[] buffer, int offset, int len, int lineNo, int linePos) {
        return new PrettyToken(buffer, offset, len, PrettyTokenType.IDENTIFIER, lineNo, linePos);
    }

    @Override
    protected PrettyToken stringToken(char[] buffer, int offset, int len, int lineNo, int linePos) {
        return new PrettyToken(buffer, offset, len, PrettyTokenType.STRING, lineNo, linePos);
    }

    @Override
    protected PrettyToken numberToken(char[] buffer, int offset, int len, int lineNo, int linePos) {
        return new PrettyToken(buffer, offset, len, PrettyTokenType.NUMBER, lineNo, linePos);
    }

    @Override
    protected PrettyToken symbolToken(char[] buffer, int offset, int len, int lineNo, int linePos) {
        return new PrettyToken(buffer, offset, len, PrettyTokenType.SYMBOL, lineNo, linePos);
    }
}