TokenizerBase.java

  1. /*
  2.  * Copyright (c) 2015-2020, Stein Eldar Johnsen
  3.  *
  4.  * Licensed to the Apache Software Foundation (ASF) under one
  5.  * or more contributor license agreements. See the NOTICE file
  6.  * distributed with this work for additional information
  7.  * regarding copyright ownership. The ASF licenses this file
  8.  * to you under the Apache License, Version 2.0 (the
  9.  * "License"); you may not use this file except in compliance
  10.  * with the License. You may obtain a copy of the License at
  11.  *
  12.  *   http://www.apache.org/licenses/LICENSE-2.0
  13.  *
  14.  * Unless required by applicable law or agreed to in writing,
  15.  * software distributed under the License is distributed on an
  16.  * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
  17.  * KIND, either express or implied. See the License for the
  18.  * specific language governing permissions and limitations
  19.  * under the License.
  20.  */
  21. package net.morimekta.lexer;

  22. import net.morimekta.strings.ConsoleUtil;
  23. import net.morimekta.strings.io.LineBufferedReader;

  24. import java.io.IOException;
  25. import java.io.Reader;

  26. import static java.util.Objects.requireNonNull;
  27. import static net.morimekta.strings.EscapeUtil.javaEscape;

  28. /**
  29.  * Base tokenizer used around providence. The base tokenizer supports the
  30.  * minimum of what all the tokenizer implementations should require, so that
  31.  * each will mostly expand support, not much reduce it.
  32.  *
  33.  * @param <TT> TokenType type.
  34.  * @param <T> Token type.
  35.  */
  36. public abstract class TokenizerBase<TT, T extends Token<TT>>
  37.         extends LineBufferedReader
  38.         implements Tokenizer<TT, T> {
  39.     /** Default buffer size, if not specified: 2048 chars / 4 kB. */
  40.     public static final int DEFAULT_BUFFER_SIZE = 1 << 11; // 2048 chars --> 4kB

  41.     /**
  42.      * Create a tokenizer instance.
  43.      *
  44.      * @param in Reader to read.
  45.      * @param bufferSize The size of the read-buffer.
  46.      * @param preLoadAll If the whole content of the reader should be pre-loaded.
  47.      *                   If this is true, then all token content can be considered
  48.      *                   immutable.
  49.      */
  50.     protected TokenizerBase(Reader in,
  51.                             int bufferSize,
  52.                             boolean preLoadAll) {
  53.         super(in, bufferSize, preLoadAll);
  54.         // If the line is longer than 16k, it will not be used in error messages.
  55.     }

  56.     /**
  57.      * Creates a lexer exception from a token with a message. This is an overridable
  58.      * method in order to be able to change the exception class from the get-go in
  59.      * the tokenizer.
  60.      *
  61.      * @param token The token to cause the exception.
  62.      * @param message The exception message.
  63.      * @param params If provided will be used with {@link String#format(String, Object...)} to
  64.      *               make a readable exception message.
  65.      * @return The resulting exception.
  66.      */
  67.     public LexerException failure(T token,
  68.                                   String message,
  69.                                   Object... params) {
  70.         if (params.length > 0) {
  71.             message = String.format(message, params);
  72.         }
  73.         return new LexerException(token, message);
  74.     }

  75.     /**
  76.      * Creates an EOF lexer exception from a token with a message. This is an overridable
  77.      * method in order to be able to change the exception class from the get-go in
  78.      * the tokenizer. This is similar to {@link #failure(Token, String, Object...)} but
  79.      * does not provide a token, and assumes the tokenizer is at the end of the stream.
  80.      *
  81.      * @param message The exception message.
  82.      * @param params If provided will be used with {@link String#format(String, Object...)} to
  83.      *               make a readable exception message.
  84.      * @return The resulting exception.
  85.      */
  86.     protected LexerException eofFailure(String message,
  87.                                         Object... params) {
  88.         if (params.length > 0) {
  89.             message = String.format(message, params);
  90.         }
  91.         return new LexerException(currentLine(), currentLineNo(), currentLinePos(), 1, message);
  92.     }

  93.     /**
  94.      * Create an identifier token. This token will always start with a character accepted
  95.      * by {@link #startIdentifier()}.
  96.      *
  97.      * @param buffer The char buffer to be wrapped in the token.
  98.      * @param offset The buffer offset for start of token.
  99.      * @param len The token length.
  100.      * @param lineNo The line number of the token position.
  101.      * @param linePos The line position of the token.
  102.      * @return The created token.
  103.      */
  104.     protected abstract T identifierToken(char[] buffer, int offset, int len, int lineNo, int linePos);

  105.     /**
  106.      * Create a string token. This token will always start and end with a character accepted
  107.      * by {@link #startString()}.
  108.      *
  109.      * @param buffer The char buffer to be wrapped in the token.
  110.      * @param offset The buffer offset for start of token.
  111.      * @param len The token length.
  112.      * @param lineNo The line number of the token position.
  113.      * @param linePos The line position of the token.
  114.      * @return The created token.
  115.      */
  116.     protected abstract T stringToken(char[] buffer, int offset, int len, int lineNo, int linePos);

  117.     /**
  118.      * Create a number token. This token will always start with a character accepted
  119.      * by {@link #startNumber()}.
  120.      *
  121.      * @param buffer The char buffer to be wrapped in the token.
  122.      * @param offset The buffer offset for start of token.
  123.      * @param len The token length.
  124.      * @param lineNo The line number of the token position.
  125.      * @param linePos The line position of the token.
  126.      * @return The created token.
  127.      */
  128.     protected abstract T numberToken(char[] buffer, int offset, int len, int lineNo, int linePos);

  129.     /**
  130.      * Create a symbol token. This token will always start with a character accepted
  131.      * by {@link #startSymbol()}.
  132.      *
  133.      * @param buffer The char buffer to be wrapped in the token.
  134.      * @param offset The buffer offset for start of token.
  135.      * @param len The token length.
  136.      * @param lineNo The line number of the token position.
  137.      * @param linePos The line position of the token.
  138.      * @return The created token.
  139.      */
  140.     protected abstract T symbolToken(char[] buffer, int offset, int len, int lineNo, int linePos);

  141.     /**
  142.      * Create a generic token. This is called from {@link #readUntil(char, Object, boolean)}, and
  143.      * can be used if at some point a custom token type should be created.
  144.      *
  145.      * @param buffer The char buffer to be wrapped in the token.
  146.      * @param offset The buffer offset for start of token.
  147.      * @param len The token length.
  148.      * @param type The type of the token.
  149.      * @param lineNo The line number of the token position.
  150.      * @param linePos The line position of the token.
  151.      * @return The created token.
  152.      */
  153.     protected abstract T genericToken(char[] buffer, int offset, int len, TT type, int lineNo, int linePos);

  154.     // --- Token start char detection.

  155.     /**
  156.      * Whitespace characters are generally ignored, and assumed to have no meaning.
  157.      *
  158.      * @return If the last char is a whitespace. See {@link #lastChar}.
  159.      */
  160.     protected boolean isWhitespace() {
  161.         return lastChar == '\n' ||
  162.                lastChar == '\r' ||
  163.                lastChar == '\t' ||
  164.                lastChar == ' ';
  165.     }


  166.     /**
  167.      * @return If the last character is the valid start of a number. See {@link #lastChar}.
  168.      */
  169.     protected boolean startNumber() {
  170.         return lastChar == '.' ||
  171.                lastChar == '-' ||
  172.                (lastChar >= '0' && lastChar <= '9');
  173.     }

  174.     /**
  175.      * @return If the last character is the valid start of a string. See {@link #lastChar}.
  176.      */
  177.     protected boolean startString() {
  178.         return lastChar == '\"';
  179.     }

  180.     /**
  181.      * @return If the last character is the valid start of an identifier. See {@link #lastChar}.
  182.      */
  183.     protected boolean startIdentifier() {
  184.         return '_' == lastChar ||
  185.                ('a' <= lastChar && lastChar <= 'z') ||
  186.                ('A' <= lastChar && lastChar <= 'Z');
  187.     }

  188.     /**
  189.      * @return If the last character is the valid start of a symbol. See {@link #lastChar}.
  190.      */
  191.     protected boolean startSymbol() {
  192.         return lastChar != '#' &&
  193.                lastChar >= 0x20 &&
  194.                lastChar < 0x7F;
  195.     }

  196.     // --- Internal token allowance helpers.

  197.     /**
  198.      * @param last Character to check.
  199.      * @return If the character is allowed as part of an identifier.
  200.      */
  201.     protected boolean allowIdentifier(int last) {
  202.         return '_' == last || '.' == last ||
  203.                ('a' <= last && last <= 'z') ||
  204.                ('A' <= last && last <= 'Z') ||
  205.                ('0' <= last && last <= '9');
  206.     }

  207.     /**
  208.      * Rules for identifiers is to be a separator joined string of
  209.      * allowed identifier chars. The separator cannot be directly repeated,
  210.      * and the identifier cannot end with a separator.
  211.      *
  212.      * @param last Character to check.
  213.      * @return If the character is separating parts of the identifier.
  214.      */
  215.     protected boolean identifierSeparator(int last) {
  216.         return last == '.';
  217.     }

  218.     /**
  219.      * @param last Character to check.
  220.      * @return If the character is allowed as a type-marker for a integer.
  221.      */
  222.     protected boolean allowAfterInteger(int last) {
  223.         return false;
  224.     }

  225.     /**
  226.      * @param last Character to check
  227.      * @return If the character is allowed as a type-marker for a floating point
  228.      *         number. This is also checked after integers.
  229.      */
  230.     protected boolean allowAfterFloatingPoint(int last) {
  231.         return false;
  232.     }

  233.     // --- CONSUME ---

  234.     /**
  235.      * This method is called when all other start-of-token checks have
  236.      * been met and failed for any non-whitespace char.
  237.      *
  238.      * @return True if the character, or any number of characters was
  239.      *         consumed. False if nothing was consumed. This will result
  240.      *         in an unknown start of token exception.
  241.      * @throws IOException If unable to consume characters.
  242.      */
  243.     protected boolean maybeConsumeSilent() throws IOException {
  244.         // Cheap way to consume simple comments.
  245.         if (lastChar == '#') {
  246.             getRestOfLine();
  247.             lastChar = 0;
  248.             return true;
  249.         }
  250.         return false;
  251.     }

  252.     /**
  253.      * This method is protected so that it can be overridden to
  254.      * produce multi-symbol tokens if necessary.
  255.      *
  256.      * @return Symbol token.
  257.      * @throws IOException If unable to read token.
  258.      */
  259.     protected T nextSymbol() throws IOException {
  260.         lastChar = 0;
  261.         return symbolToken(buffer, bufferOffset, 1, lineNo, linePos);
  262.     }

  263.     // --- Tokenizer ---

  264.     @Override
  265.     public int currentLineNo() {
  266.         return this.lineNo;
  267.     }

  268.     @Override
  269.     public int currentLinePos() {
  270.         return this.linePos;
  271.     }

  272.     @Override
  273.     public CharSequence currentLine() {
  274.         return getLine();
  275.     }

  276.     @Override
  277.     public T readUntil(CharSequence terminator, TT type, boolean allowEof) throws IOException {
  278.         requireNonNull(terminator, "terminator == null");
  279.         requireNonNull(type, "type == null");

  280.         maybeConsolidateBuffer();
  281.         if (lastChar == 0) {
  282.             readNextChar();
  283.         }

  284.         if (terminator.length() == 1) {
  285.             return readUntil(terminator.charAt(0), type, allowEof);
  286.         }

  287.         int startOffset = bufferOffset;
  288.         final int startLineNo = lineNo;
  289.         final int startLinePos = linePos;
  290.         final int termLen = terminator.length();
  291.         final int last = terminator.charAt(terminator.length() - 1);

  292.         int len = 0;
  293.         int total = 0;
  294.         StringBuilder consolidated = null;
  295.         while (lastChar >= 0) {
  296.             ++len;
  297.             ++total;

  298.             if (total >= termLen && lastChar == last) {
  299.                 boolean cont = false;
  300.                 for (int i = termLen - 1; i >= 0; --i) {
  301.                     final int bp = bufferOffset + 1 - termLen + i;
  302.                     if (bp < 0) {
  303.                         if (consolidated == null) throw new IllegalStateException("Bad line consolidation");
  304.                         if (consolidated.charAt(consolidated.length() + bp) != terminator.charAt(i)) {
  305.                             cont = true;
  306.                             break;
  307.                         }
  308.                     } else if (buffer[bp] != terminator.charAt(i)) {
  309.                         cont = true;
  310.                         break;
  311.                     }
  312.                 }
  313.                 if (!cont) {
  314.                     len -= terminator.length();
  315.                     break;
  316.                 }
  317.             }
  318.             if (!preLoaded && bufferOffset == (bufferLimit - 1)) {
  319.                 if (consolidated == null) {
  320.                     consolidated = new StringBuilder();
  321.                 }
  322.                 consolidated.append(buffer, startOffset, len);
  323.                 startOffset = 0;
  324.                 len = 0;
  325.             }

  326.             readNextChar();
  327.         }
  328.         if (lastChar <= 0 && !allowEof) {
  329.             throw eofFailure("End of file while reading until '%s'", javaEscape(terminator));
  330.         }
  331.         lastChar = 0;
  332.         if (consolidated != null) {
  333.             if (len > 0) {
  334.                 consolidated.append(buffer, startOffset, len);
  335.             } else if (len < 0) {
  336.                 consolidated.delete(consolidated.length() + len, consolidated.length());
  337.             }
  338.             return genericToken(consolidated.toString().toCharArray(),
  339.                                 0,
  340.                                 consolidated.length(),
  341.                                 type, startLineNo, startLinePos);
  342.         } else if (len > 0) {
  343.             return genericToken(buffer, startOffset, len, type, startLineNo, startLinePos);
  344.         }
  345.         return null;
  346.     }

  347.     // --- Object ---

  348.     @Override
  349.     public String toString() {
  350.         return getClass().getSimpleName() + "{preLoaded=" + preLoaded + "}";
  351.     }

  352.     private T readUntil(char terminator, TT type, boolean allowEof) throws IOException {
  353.         int startOffset = bufferOffset;
  354.         int startLineNo = lineNo;
  355.         int startLinePos = linePos;

  356.         int len = 0;
  357.         StringBuilder consolidated = null;
  358.         while (lastChar >= 0 && lastChar != terminator) {
  359.             ++len;
  360.             if (!preLoaded && bufferOffset == (bufferLimit - 1)) {
  361.                 if (consolidated == null) {
  362.                     consolidated = new StringBuilder();
  363.                 }
  364.                 consolidated.append(buffer, startOffset, len);
  365.                 startOffset = 0;
  366.                 len = 0;
  367.             }
  368.             readNextChar();
  369.         }
  370.         if (lastChar < 0 && !allowEof) {
  371.             throw eofFailure("End of file while reading until '%s'", javaEscape(terminator));
  372.         }
  373.         lastChar = 0;
  374.         if (consolidated != null) {
  375.             if (len > 0) {
  376.                 consolidated.append(buffer, startOffset, len);
  377.             }
  378.             return genericToken(consolidated.toString().toCharArray(),
  379.                                 0,
  380.                                 consolidated.length(),
  381.                                 type, startLineNo, startLinePos);
  382.         } else if (len > 0) {
  383.             return genericToken(buffer, startOffset, len, type, startLineNo, startLinePos);
  384.         }
  385.         return null;
  386.     }

  387.     @Override
  388.     public T parseNextToken() throws IOException {
  389.         while (lastChar >= 0) {
  390.             if (lastChar == 0) {
  391.                 if (!readNextChar()) {
  392.                     break;
  393.                 }
  394.             }

  395.             if (isWhitespace()) {
  396.                 // ignore
  397.                 lastChar = 0;
  398.             } else if (startIdentifier()) {
  399.                 return nextIdentifier();
  400.             } else if (startString()) {
  401.                 return nextString(lastChar);
  402.             } else if (startNumber()) {
  403.                 return nextNumber();
  404.             } else if (startSymbol()) {
  405.                 return nextSymbol();
  406.             } else if (!maybeConsumeSilent()) {
  407.                 // non-allowed characters not starting a valid token.
  408.                 T token = symbolToken(buffer, bufferOffset, 1, lineNo, linePos);
  409.                 throw failure(token, "Unknown token initiator '%s'", javaEscape((char) lastChar));
  410.             }
  411.         }

  412.         return null;
  413.     }

  414.     // --- INTERNAL ---

  415.     private T nextIdentifier() throws IOException {
  416.         maybeConsolidateBuffer();

  417.         int startPos = linePos;
  418.         int startOffset = bufferOffset;
  419.         int startLine = lineNo;
  420.         int len = 1;

  421.         int lastLast = lastChar;
  422.         if (!readNextChar()) {
  423.             return identifierToken(buffer, startOffset, len, startLine, startPos);
  424.         }

  425.         while (allowIdentifier(lastChar)) {
  426.             ++len;
  427.             if (lastChar == lastLast && identifierSeparator(lastLast)) {
  428.                 T token = identifierToken(buffer, startOffset, len, startLine, startPos);
  429.                 throw failure(token, "Identifier with double '.'");
  430.             } else if (identifierSeparator(lastLast) && !startIdentifier()) {
  431.                 T token = identifierToken(buffer, startOffset, len, startLine, startPos);
  432.                 throw failure(token, "Identifier part with invalid start '%c'", lastChar);
  433.             }
  434.             lastLast = lastChar;
  435.             if (!readNextChar()) {
  436.                 break;
  437.             }
  438.         }

  439.         if (identifierSeparator(lastLast)) {
  440.             T token = identifierToken(buffer, startOffset, len, startLine, startPos);
  441.             throw failure(token, "Identifier with trailing '%c'", (char) lastLast);
  442.         }

  443.         return identifierToken(buffer, startOffset, len, startLine, startPos);
  444.     }

  445.     private T nextNumber() throws IOException {
  446.         maybeConsolidateBuffer();
  447.         // NOTE: This code is pretty messy because it is a full state-engine
  448.         // to ensure that the parsed number follows the JSON number syntax.
  449.         // Alternatives are:
  450.         //
  451.         // dec = -?0
  452.         // dec = -?.0
  453.         // dec = -?0.0
  454.         // sci = (dec)[eE][+-]?[0-9]+
  455.         // hex = 0x[0-9a-fA-F]+
  456.         // oct = 0[0-7]+
  457.         //
  458.         // It is programmed as a state-engine to be very efficient, but
  459.         // correctly detect valid JSON (and what is invalid if not).

  460.         int startLine   = lineNo;
  461.         int startPos    = linePos;
  462.         int startOffset = bufferOffset;
  463.         // number (any type).
  464.         int len = 0;

  465.         if (lastChar == '-') {
  466.             // only base 10 decimals can be negative.
  467.             ++len;
  468.             if (!readNextChar()) {
  469.                 T token = numberToken(buffer, startOffset, len, startLine, startPos);
  470.                 throw failure(token, "Negative indicator without number");
  471.             }

  472.             if (!(lastChar == '.' || (lastChar >= '0' && lastChar <= '9'))) {
  473.                 T token = numberToken(buffer, startOffset, len, startLine, startPos);
  474.                 throw failure(token, "No decimal after negative indicator");
  475.             }
  476.         } else if (lastChar == '0') {
  477.             if (readNextChar()) {
  478.                 ++len;
  479.                 if (lastChar == 'x') {
  480.                     ++len;
  481.                     if (!readNextChar()) {
  482.                         T token = numberToken(buffer, startOffset, len, startLine, startPos);
  483.                         throw failure(token, "No decimal after hex indicator");
  484.                     }
  485.                     // hexadecimal.
  486.                     do {
  487.                         if (!((lastChar >= '0' && lastChar <= '9') || (lastChar >= 'a' && lastChar <= 'f') ||
  488.                               (lastChar >= 'A' && lastChar <= 'F'))) {
  489.                             // we read a char that's *not* part of the hex number.
  490.                             break;
  491.                         }
  492.                         ++len;
  493.                     } while (readNextChar());

  494.                     return validateAfterNumber(startOffset, startPos, len);
  495.                 } else if ('0' <= lastChar && lastChar <= '7') {
  496.                     ++len;
  497.                     // Octals have 0 in front, and then more digits.
  498.                     while (readNextChar()) {
  499.                         if (lastChar < '0' || lastChar > '7') {
  500.                             break;
  501.                         }
  502.                         ++len;
  503.                     }
  504.                     return validateAfterNumber(startOffset, startPos, len);
  505.                 }
  506.             } else {
  507.                 // just '0'
  508.                 return validateAfterNumber(startOffset, startPos, 1);
  509.             }
  510.         }

  511.         // decimal part.
  512.         while (lastChar >= '0' && lastChar <= '9') {
  513.             ++len;
  514.             // numbers are terminated by first non-numeric character.
  515.             if (!readNextChar()) {
  516.                 break;
  517.             }
  518.         }
  519.         // fraction part.
  520.         if (lastChar == '.') {
  521.             ++len;
  522.             // numbers are terminated by first non-numeric character.
  523.             if (readNextChar()) {
  524.                 while (lastChar >= '0' && lastChar <= '9') {
  525.                     ++len;
  526.                     // numbers are terminated by first non-numeric character.
  527.                     if (!readNextChar()) {
  528.                         break;
  529.                     }
  530.                 }
  531.             }
  532.         } else if (allowAfterInteger(lastChar) ||
  533.                    allowAfterFloatingPoint(lastChar)) {
  534.             ++len;
  535.             // ignore the actual char, but move the 'last' so we can check the char after.
  536.             readNextChar();
  537.             return validateAfterNumber(startOffset, startPos, len);
  538.         }

  539.         // exponent part.
  540.         if (lastChar == 'e' || lastChar == 'E') {
  541.             ++len;
  542.             // numbers are terminated by first non-numeric character.
  543.             if (!readNextChar()) {
  544.                 T token = numberToken(buffer, startOffset, len, startLine, startPos);
  545.                 throw failure(token,
  546.                               "Badly terminated number exponent: '%s'", token);
  547.             }

  548.             // The exponent can be explicitly prefixed with both '+'
  549.             // and '-'.
  550.             if (lastChar == '-' || lastChar == '+') {
  551.                 ++len;
  552.                 // numbers are terminated by first non-numeric character.
  553.                 if (!readNextChar()) {
  554.                     T token = numberToken(buffer, startOffset, len, startLine, startPos);
  555.                     throw failure(token, "Badly terminated number exponent: '%s'", token);
  556.                 }
  557.             }

  558.             if (lastChar >= '0' && lastChar <= '9') {
  559.                 while (lastChar >= '0' && lastChar <= '9') {
  560.                     ++len;
  561.                     // numbers are terminated by first non-numeric character.
  562.                     if (!readNextChar()) {
  563.                         break;
  564.                     }
  565.                 }
  566.             } else {
  567.                 T token = numberToken(buffer, startOffset, len + 1, startLine, startPos);
  568.                 throw failure(token, "Badly terminated number exponent: '%s'", token);
  569.             }
  570.         }

  571.         if (allowAfterFloatingPoint(lastChar)) {
  572.             ++len;
  573.             // ignore the actual char, but move the 'last' so we can check the char after.
  574.             readNextChar();
  575.         }

  576.         return validateAfterNumber(startOffset, startPos, len);
  577.     }

  578.     private T validateAfterNumber(int startOffset, int startLinePos, int len)
  579.             throws IOException {
  580.         // A number must be terminated correctly. must not immediately start identifier or string.
  581.         if (lastChar > 0 && (startIdentifier() || startString() || startNumber())) {
  582.             T token = numberToken(buffer, startOffset, len + 1, lineNo, startLinePos);
  583.             throw failure(token, "Invalid termination of number: '%s'", token);
  584.         } else {
  585.             return numberToken(buffer, startOffset, len, lineNo, startLinePos);
  586.         }
  587.     }

  588.     private T nextString(int quote) throws IOException {
  589.         maybeConsolidateBuffer();

  590.         // strings may be longer than 128 bytes. We may need to build it.
  591.         StringBuilder consolidatedString = null;

  592.         int startLine = lineNo;
  593.         int startPos = linePos;
  594.         int startOffset = bufferOffset;

  595.         boolean esc = false;
  596.         for (; ; ) {
  597.             if (!preLoaded && !bufferLineEnd && bufferOffset >= (bufferLimit - 1)) {
  598.                 if (consolidatedString == null) {
  599.                     consolidatedString = new StringBuilder();
  600.                 }
  601.                 consolidatedString.append(buffer, startOffset, bufferOffset - startOffset + 1);
  602.                 startOffset = 0;
  603.             }

  604.             if (!readNextChar()) {
  605.                 throw eofFailure("Unexpected end of stream in string");
  606.             }

  607.             if (esc) {
  608.                 esc = false;
  609.             } else if (lastChar == '\\') {
  610.                 esc = true;
  611.             } else if (lastChar == quote) {
  612.                 break;
  613.             } else if (lastChar == '\n' || lastChar == '\r') {
  614.                 T token = symbolToken(buffer, bufferOffset, 1, startLine, linePos);
  615.                 throw failure(token, "Unexpected newline in string");
  616.             } else if (lastChar < 0x20 || lastChar == 0x7f ||
  617.                        (lastChar > 0x7f && !ConsoleUtil.isConsolePrintable(lastChar))) {
  618.                 T token = symbolToken(buffer, bufferOffset, 1, startLine, linePos);
  619.                 throw failure(token, "Unescaped non-printable char in string: '%s'",
  620.                               javaEscape((char) lastChar));
  621.             }
  622.         }

  623.         lastChar = 0;
  624.         if (consolidatedString != null) {
  625.             consolidatedString.append(buffer, 0, bufferOffset + 1);
  626.             String result = consolidatedString.toString();
  627.             return stringToken(result.toCharArray(), 0, result.length(), startLine, startPos);
  628.         } else {
  629.             return stringToken(buffer, startOffset, bufferOffset - startOffset + 1, startLine, startPos);
  630.         }
  631.     }
  632. }