ThriftTokenizer.java

/*
 * Copyright 2016 Providence Authors
 *
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements. See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership. The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License. You may obtain a copy of the License at
 *
 *   http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing,
 * software distributed under the License is distributed on an
 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 * KIND, either express or implied. See the License for the
 * specific language governing permissions and limitations
 * under the License.
 */
package net.morimekta.providence.reflect.parser;

import net.morimekta.providence.serializer.pretty.PrettyTokenizer;
import net.morimekta.util.Strings;
import net.morimekta.util.io.Utf8StreamReader;
import net.morimekta.util.lexer.TokenizerBase;

import javax.annotation.Nonnull;
import javax.annotation.Nullable;
import java.io.IOException;
import java.io.InputStream;
import java.io.Reader;

/**
 * Specialization of the 'pretty' tokenizer to make it handle some
 * special cases only applicable when parsing thrift files, but not
 * allowed in pretty format or config files.
 */
public class ThriftTokenizer extends TokenizerBase<ThriftTokenType, ThriftToken> {
    // Various thrift keywords.
    public static final String kNamespace = "namespace";
    public static final String kInclude   = "include";
    public static final String kTypedef   = "typedef";
    public static final String kEnum      = "enum";
    public static final String kStruct    = "struct";
    public static final String kUnion     = "union";
    public static final String kException = "exception";
    public static final String kInterface = "interface";
    public static final String kConst     = "const";
    public static final String kService   = "service";

    public static final String kExtends  = "extends";
    public static final String kVoid     = "void";
    public static final String kOneway   = "oneway";
    public static final String kThrows   = "throws";
    public static final String kRequired = "required";
    public static final String kOptional = "optional";

    public static final String kImplements = "implements";
    public static final String kOf         = "of";

    // Part of constant value
    public static final String kNull = "null";

    public ThriftTokenizer(InputStream in) {
        this(new Utf8StreamReader(in));
    }

    public ThriftTokenizer(Reader reader) {
        super(reader, PrettyTokenizer.DEFAULT_BUFFER_SIZE, true);
    }

    @Override
    protected ThriftToken genericToken(char[] buffer,
                                       int offset,
                                       int len,
                                       @Nonnull ThriftTokenType type,
                                       int lineNo,
                                       int linePos) {
        return new ThriftToken(buffer, offset, len, type, lineNo, linePos);
    }

    @Nullable
    @Override
    public ThriftToken parseNextToken() throws IOException {
        ThriftToken token = super.parseNextToken();
        while (token != null && token.type() == ThriftTokenType.COMMENT) {
            token = super.parseNextToken();
        }
        return token;
    }

    @Override
    protected ThriftToken identifierToken(char[] buffer, int offset, int len, int lineNo, int linePos) {
        return new ThriftToken(buffer, offset, len, ThriftTokenType.IDENTIFIER, lineNo, linePos);
    }

    @Override
    protected ThriftToken stringToken(char[] buffer, int offset, int len, int lineNo, int linePos) {
        return new ThriftToken(buffer, offset, len, ThriftTokenType.STRING, lineNo, linePos);
    }

    @Override
    protected ThriftToken numberToken(char[] buffer, int offset, int len, int lineNo, int linePos) {
        return new ThriftToken(buffer, offset, len, ThriftTokenType.NUMBER, lineNo, linePos);
    }

    @Override
    protected ThriftToken symbolToken(char[] buffer, int offset, int len, int lineNo, int linePos) {
        return new ThriftToken(buffer, offset, len, ThriftTokenType.SYMBOL, lineNo, linePos);
    }

    @Override
    protected boolean startString() {
        return lastChar == '\"' || lastChar == '\'';
    }

    @Nonnull
    @Override
    protected ThriftToken nextSymbol() throws IOException {
        if (lastChar == '/') {
            int startOffset = bufferOffset;
            int startLinePos = linePos;

            if (!readNextChar()) {
                throw eofFailure("Expected java-style comment, got end of file");
            }
            if (lastChar == '/') {
                lastChar = 0;
                ThriftToken token = readUntil("\n", ThriftTokenType.COMMENT, true);
                if (token == null) {
                    // empty comment;
                    return genericToken("".toCharArray(), 0, 0, ThriftTokenType.COMMENT, lineNo, startLinePos + 2);
                }
                return token;
            }
            if (lastChar == '*') {
                lastChar = 0;
                ThriftToken token = readUntil("*/", ThriftTokenType.DOCUMENTATION, false);
                if (token == null) {
                    // empty comment;
                    return genericToken("".toCharArray(), 0, 0, ThriftTokenType.COMMENT, lineNo, startLinePos + 2);
                }
                return token;
            }

            ThriftToken token = symbolToken(buffer, startOffset, 2, lineNo, startLinePos);
            throw failure(token, "Expected java-style comment, got '%s' after '/'",
                          Strings.escape((char) lastChar));
        }
        return super.nextSymbol();
    }

    @Override
    protected boolean allowIdentifier(int last) {
        return last == '-' || super.allowIdentifier(last);
    }
}