LineBufferedReader.java

/*
 * Copyright (c) 2017, Stein Eldar Johnsen
 *
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements. See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership. The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License. You may obtain a copy of the License at
 *
 *   http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing,
 * software distributed under the License is distributed on an
 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 * KIND, either express or implied. See the License for the
 * specific language governing permissions and limitations
 * under the License.
 */
package net.morimekta.strings.io;

import java.io.CharArrayWriter;
import java.io.IOException;
import java.io.Reader;
import java.io.UncheckedIOException;
import java.util.ArrayList;
import java.util.List;

/**
 * Helper class that manages a buffer if 1 line, or if requested pre-loads the
 * content of the input reader, and pretends as if it was the same line-buffering
 * reader. The whole point of this class is to be a base for tokenizer classes
 * JSON and other simple text-based syntax parsers.
 *
 * The mix of the Reader and the line buffering enables the tokenizer to be pretty
 * efficient (minimal data conversion) while having access to a stream of
 * characters to be parsed. For this reason all of the internal state fields
 * are made protected, though extending classes should not assign any fields
 * but the 'lastChar'.
 */
public class LineBufferedReader extends Reader {
    /**
     * @param reader Reader to read from.
     */
    public LineBufferedReader(Reader reader) {
        this(reader, DEFAULT_LINE_BUFFER_SIZE);
    }

    /**
     * @param reader Reader to read from.
     * @param bufferSize Size of in-memory buffer.
     */
    public LineBufferedReader(Reader reader, int bufferSize) {
        this(reader, bufferSize, false);
    }

    /**
     * @param reader Reader to read from.
     * @param preLoadAll If all content of the reader should be loaded at once.
     */
    public LineBufferedReader(Reader reader, boolean preLoadAll) {
        this(reader, DEFAULT_LINE_BUFFER_SIZE, preLoadAll);
    }

    /**
     * @param reader Reader to read from.
     * @param bufferSize Size of in-memory buffer.
     * @param preLoadAll If all content of the reader should be loaded at once.
     */
    public LineBufferedReader(Reader reader, int bufferSize, boolean preLoadAll) {
        this.reader = reader;
        this.lineNo = 0;
        this.linePos = 0;
        this.bufferOffset = -1;
        this.bufferLineEnd = false;
        this.preLoaded = preLoadAll;

        if (preLoaded) {
            try {
                CharArrayWriter writer = new CharArrayWriter();
                char[] tmp = new char[bufferSize];
                int r;
                while ((r = reader.read(tmp)) > 0) {
                    writer.write(tmp, 0, r);
                }
                this.buffer = writer.toCharArray();
                this.bufferLimit = buffer.length;
            } catch (IOException e) {
                throw new UncheckedIOException(e.getMessage(), e);
            }
        } else {
            this.buffer = new char[bufferSize];
            this.bufferLimit = -1;
        }

        this.lastChar = 0;
    }

    @Override
    public int read() throws IOException {
        if (lastChar > 0 || readNextChar()) {
            int ret = lastChar;
            lastChar = 0;
            return ret;
        }
        return -1;
    }

    @Override
    public int read(char[] chars, int off, int len) throws IOException {
        if (off + len > chars.length) {
            throw new IllegalArgumentException("off: " + off + " len: " + len + " > char[" + chars.length + "]");
        }
        if (off < 0 || len < 0) {
            throw new IllegalArgumentException("off: " + off + " len: " + len);
        }

        int r = 0;
        if (len > 0 && lastChar > 0) {
            chars[off] = (char) lastChar;
            ++r;
        }

        while (r < len && readNextChar()) {
            chars[off + r] = (char) lastChar;
            ++r;
        }
        if (r == len) {
            lastChar = 0;
            // Read whole buffer, last char is consumed.
        }
        return r;
    }

    @Override
    public void close() {
        // ignore.
    }

    /**
     * @return The current line number. Starts at 1.
     */
    public int getLineNo() {
        return lineNo;
    }

    /**
     * The position of the current (last read) char in the current line.
     * Shows number of chars read since last newline, so will act as a
     * 1-indexed position.
     *
     * @return The char position.
     */
    public int getLinePos() {
        return linePos;
    }

    /**
     * Returns the current line in the buffer. Or empty string if not usable.
     *
     * @return The line string, not including the line-break.
     */
    public String getLine() {
        if (preLoaded) {
            if (bufferOffset >= 0 && bufferLimit > 0) {
                int lineStart = bufferOffset;
                if (linePos > 1) {
                    lineStart -= (linePos - 1);
                }
                int lineEnd = bufferOffset;
                if (lineEnd > bufferLimit) {
                    --lineEnd;
                }
                while (lineEnd < bufferLimit && buffer[lineEnd] != '\n') {
                    ++lineEnd;
                }
                return new String(buffer, lineStart, lineEnd - lineStart);
            }
        } else if (bufferLimit > 0) {
            if (Math.abs((linePos - 1) - bufferOffset) < 2) {
                // only return the line if the line has not been consolidated before the
                // exception. This should avoid showing a bad exception line pointing to
                // the wrong content. This should never be the case in pretty-printed
                // JSON unless some really really long strings are causing the error.
                //
                // Since linePos does not exactly follow offset, we must accept +- 1.
                return new String(buffer, 0, bufferLimit - (bufferLineEnd ? 1 : 0));
            }
        }

        // Otherwise we don't have the requested line, return empty string.
        return "";
    }

    /**
     * Return the rest of the current line. This is handy for handling unwanted content
     * after the last expected token or character.
     *
     * @return The rest of the last read line. Not including leading and ending whitespaces,
     *         since these are allowed.
     * @throws IOException If unable to read the rest of the line.
     */
    public String getRestOfLine() throws IOException {
        if (preLoaded) {
            if (bufferOffset < 0 || buffer[bufferOffset] == '\n' ||
                (lastChar == 0 && !readNextChar())) {
                return "";
            }

            int start = bufferOffset;
            int length = 1;
            while (readNextChar()) {
                if (lastChar == '\n') {
                    lastChar = 0;
                    break;
                }
                ++length;
            }
            return new String(buffer, start, length);
        }

        if (bufferOffset < 0 || buffer[bufferOffset] == '\n' ||
            bufferOffset >= (bufferLimit - 1) ||
            (lastChar == 0 && !readNextChar())) {
            return "";
        }

        maybeConsolidateBuffer();
        StringBuilder remainderBuilder = new StringBuilder();
        do {
            int unreadChars = bufferLimit - bufferOffset;
            remainderBuilder.append(buffer, bufferOffset, unreadChars - (bufferLineEnd ? 1 : 0));
            bufferOffset = bufferLimit;
            linePos += unreadChars;

            if (bufferLineEnd) {
                break;
            }
            maybeConsolidateBuffer();
        } while (bufferOffset < (bufferLimit - 1));

        lastChar = 0;
        return remainderBuilder.toString();
    }

    /**
     * Read the rest of input from the reader, and get the lines from there.
     * This will consume the rest of the content of the reader.
     *
     * @param trimAndSkipEmpty If lines should be trimmed and empty lines should
     *                         be skipped.
     * @return List of lines after the current.
     * @throws IOException When failing to read stream to end.
     */
    public List<String> getRemainingLines(boolean trimAndSkipEmpty) throws IOException {
        List<String> out = new ArrayList<>();

        StringBuilder builder = new StringBuilder();
        while (bufferOffset <= bufferLimit || !bufferLineEnd) {
            if (!readNextChar()) {
                break;
            }
            if (lastChar == '\n') {
                String line = trimAndSkipEmpty ? builder.toString().trim() : builder.toString();
                if (!trimAndSkipEmpty || !line.isEmpty()) {
                    out.add(line);
                }
                builder = new StringBuilder();
            } else {
                builder.append((char) lastChar);
            }
        }
        if (builder.length() > 0) {
            String line = builder.toString();
            if (!trimAndSkipEmpty || !line.trim().isEmpty()) {
                out.add(builder.toString());
            }
        }
        return out;
    }

    // -------------------------------
    // --         PROTECTED         --
    // -------------------------------

    /** Reader read from. */
    protected final Reader  reader;
    /** Current line buffer. */
    protected final char[]  buffer;
    /** If the content is pre-loaded. */
    protected final boolean preLoaded;

    /** Number of chars current buffer is limited to. */
    protected int     bufferLimit;
    /** If the buffer ends in a line end char. */
    protected boolean bufferLineEnd;
    /** Current offset in the buffer. */
    protected int     bufferOffset;
    /** Current line number (1-indexed) */
    protected int     lineNo;
    /** Current line position (0-indexed) */
    protected int     linePos;
    /** Current char in the buffer. */
    protected int     lastChar;

    /**
     * If the char buffer is nearing it's "end" and does not end with a newline
     * (meaning it is a complete line), then take the reast of the current buffer
     * and move it to the front of the buffer, and read until end of buffer, or
     * end of line.
     *
     * @throws IOException On IO errors.
     */
    protected void maybeConsolidateBuffer() throws IOException {
        if (bufferLimit == buffer.length &&
            bufferOffset > 0 &&
            bufferOffset >= (buffer.length - CONSOLIDATE_LINE_ON) &&
            !preLoaded &&
            !bufferLineEnd) {

            // A: copy the remainder to the start of the buffer.
            int len = bufferLimit - bufferOffset;
            if (len > 0) {
                System.arraycopy(buffer, bufferOffset, buffer, 0, len);
            }

            int off = len;
            char[] b = new char[1];
            while (off < buffer.length && reader.read(b, 0, 1) > 0) {
                char ch = b[0];
                buffer[off] = ch;
                ++off;
                if (ch == '\n') {
                    bufferLineEnd = true;
                    break;
                }
            }

            bufferOffset = 0;
            bufferLimit = off;
        }
    }

    /**
     * Read the next char.
     *
     * @return If a new char is available.
     * @throws IOException If unable to read from stream.
     */
    protected boolean readNextChar() throws IOException {
        if (lastChar < 0) return false;
        if (preLoaded) {
            if (bufferOffset < bufferLimit && (bufferOffset < 0 || buffer[bufferOffset] == '\n')) {
                ++lineNo;
                linePos = 0;
            }
            if (bufferOffset >= (bufferLimit - 1)) {
                ++linePos;
                ++bufferOffset;
                lastChar = -1;
                return false;
            }
        } else {
            if (bufferOffset < 0 || bufferOffset >= (bufferLimit - 1)) {
                if (!readNextLine()) {
                    ++bufferOffset;
                    lastChar = -1;
                    // not valid JSON string char.
                    return false;
                }
            }
        }
        ++linePos;
        lastChar = buffer[++bufferOffset];
        return true;
    }

    // -------------------------------
    // --          PRIVATE          --
    // -------------------------------

    private static final int CONSOLIDATE_LINE_ON      = 1 << 7;   //  128 chars
    private static final int DEFAULT_LINE_BUFFER_SIZE = 1 << 10;  // 1024 chars --> 2kb

    private boolean readNextLine() throws IOException {
        if (bufferLimit > 0 && !bufferLineEnd) {
            // check for "last line"
            if (bufferLimit < buffer.length) {
                ++linePos;
                return false;
            }
        } else {
            ++lineNo;
            linePos = 0;
        }

        bufferLineEnd = false;

        int off = 0;
        int b;
        while (off < buffer.length && (b = reader.read()) >= 0) {
            buffer[off] = (char) b;
            ++off;
            if (b == '\n') {
                bufferLineEnd = true;
                break;
            }
        }
        if (off > 0) {
            if (off < buffer.length) {
                buffer[off] = 0;
            }
            bufferOffset = -1;
            bufferLimit = off;
            return true;
        }
        ++linePos;
        return false;
    }
}