CharSlice.java

/*
 * Copyright (c) 2016, Stein Eldar Johnsen
 *
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements. See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership. The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License. You may obtain a copy of the License at
 *
 *   http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing,
 * software distributed under the License is distributed on an
 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 * KIND, either express or implied. See the License for the
 * specific language governing permissions and limitations
 * under the License.
 */
package net.morimekta.strings.chr;

import static java.lang.Math.min;

/**
 * A slice of a character array. This class is handy to expand on whenever
 * a very large number of sub-strings are handled, so you are guaranteed
 * that the underlying character array is never copied. Also:
 *
 * <ul>
 *     <li>Ordering is in a structured order of the underlying buffer, not
 *         of the visible string itself.
 *     </li>
 *     <li>Equality and hash is based on the assumption that you care
 *         about the underlying buffer more than the actual characters.
 *     </li>
 *     <li>
 *         The {@link #toString()} method returns the "visible" content
 *         of the slice, as it is was a string.
 *     </li>
 * </ul>
 * <p>
 * As the characters in the buffer is not considered part of the data of
 * this class, it can be considered fully immutable.
 */
public class CharSlice implements Comparable<CharSlice>, CharSequence {
    /**
     * Character buffer.
     */
    protected final char[] fb;
    /**
     * Offset in buffer
     */
    protected final int    off;
    /**
     * Length of slice.
     */
    protected final int    len;

    /**
     * Create a slice instance.
     *
     * @param raw The raw string to wrap.
     */
    public CharSlice(String raw) {
        this.fb = raw.toCharArray();
        this.off = 0;
        this.len = fb.length;
    }

    /**
     * Create a slice from a full character buffer.
     *
     * @param fb The char buffer to wrap in the slice.
     */
    public CharSlice(char[] fb) {
        this(fb, 0, fb.length);
    }

    /**
     * Create a slice instance. The slice is only meant to be internal state
     * immutable, and not representing an immutable byte content.
     *
     * @param fb  The buffer to wrap.
     * @param off The start offset to wrap.
     * @param len The length to represent.
     */
    public CharSlice(char[] fb, int off, int len) {
        if (fb == null) {
            throw new IllegalArgumentException("No buffer");
        }
        if (off < 0) {
            throw new IllegalArgumentException("Negative offset: " + off);
        }
        if (len < 0) {
            throw new IllegalArgumentException("Negative length: " + len);
        }
        if (off + len > fb.length) {
            throw new IllegalArgumentException("Too large slice: " + off + "+" + len + " of " + fb.length);
        }
        this.fb = fb;
        this.off = off;
        this.len = len;
    }

    /**
     * Get the char slice for the sequence.
     *
     * @param str Get char slice of the sequence.
     * @return The slice or slice wrapping the string.
     */
    public static CharSlice of(CharSequence str) {
        if (str instanceof CharSlice) {
            return (CharSlice) str;
        }
        return new CharSlice(str.toString().toCharArray());
    }

    /**
     * Get the offset of the buffer.
     *
     * @return The slice offset.
     */
    public int offset() {
        return off;
    }

    /**
     * Similar to subSequence, but also handles negative offset, where a negative
     * offset means relative to the end. This will return the slice from the start
     * position of the index, until the end. E.g. <code>subSlice(-2)</code> will
     * give a slice containing the last two chars of the origin.
     *
     * @param start The start offset, inclusive.
     * @return The resulting slice.
     */
    public CharSlice subSlice(int start) {
        if (start < -len || start > len) {
            throw new IllegalArgumentException(
                    String.format("[%d:%d] of slice length %d is not valid.", start, len, len));
        }
        if (start < 0) {
            start = len + start;
        }
        return subSequence(start, len);
    }

    /**
     * Similar to subSequence, but also handles negative offset, where a negative
     * offset means relative to the end.
     *
     * @param start The start offset, inclusive.
     * @param end   The end offset, exclusive.
     * @return The resulting slice.
     */
    public CharSlice subSlice(int start, int end) {
        if (start < -len || start >= len || end < -len || end > len) {
            throw new IllegalArgumentException(
                    String.format("[%d:%d] of slice length %d is not valid.", start, end, len));
        }
        if (start < 0) {
            start = len + start;
        }
        if (end < 0) {
            end = len + end;
        }
        if (end < start) {
            throw new IllegalArgumentException(
                    String.format("[%d:%d] of slice length %d is not valid.", start, end, len));
        }
        return subSequence(start, end);
    }

    /**
     * Get char char at the dynamic index. This handles negative indices as counting
     * from the end of the slice.
     *
     * @param index The index of the requested char.
     * @return The char at given index.
     */
    public char at(int index) {
        if (index < 0) {
            return charAt(len + index);
        }
        return charAt(index);
    }

    // --- CharSequence ---

    /**
     * Get the total length of the slice.
     *
     * @return The slice length.
     */
    @Override
    public int length() {
        return len;
    }

    /**
     * Create a substring slice based on the current slice.
     * <p>
     * {@inheritDoc}
     *
     * @param start The internal start position, relative to the slice's offset, inclusive.
     * @param end   The internal end position, relative to the slice's offset, exclusive.
     * @return The substring slice.
     */
    @Override
    public CharSlice subSequence(int start, int end) {
        if (start < 0 || start > end || end > len) {
            throw new IllegalArgumentException(
                    String.format("[%d:%d] of slice length %d is not valid.", start, end, len));
        }
        if (start == 0 && len == end) {
            return this;
        }
        int l = end - start;
        return new CharSlice(fb, off + start, l);
    }

    /**
     * Get character at slice relative position.
     *
     * @param i The position to get. If negative is relative to the slice's end position.
     * @return The char at given position.
     */
    @Override
    public char charAt(int i) {
        if (i < 0 || len <= i) {
            throw new IllegalArgumentException(
                    String.format("[%d] of slice length %d is not valid.", i, len));
        }
        return fb[off + i];
    }

    // --- Comparable ---

    /**
     * Compare slice with other slice as a string. This will always do
     * a simple char-by-char value comparison, ignoring locale collation.
     *
     * @param o The other slice.
     * @return Compared value.
     */
    @Override
    public int compareTo(CharSlice o) {
        if (o.fb == fb && o.off == off) {
            // comparing this way is only meaningful when using the same buffer.
            return Integer.compare(len, o.len);
        }
        final int minLen = min(len, o.len);
        for (int i = 0; i < minLen; ++i) {
            char tc = fb[off + i];
            char oc = o.fb[o.off + i];
            if (oc != tc) {
                return Character.compare(tc, oc);
            }
        }
        return Integer.compare(len, o.len);
    }

    /**
     * Handy method to compare slices based on the position in the main buffer.
     * <p>
     * Slice ordering:
     * - Firstly ordered by start (offset) position.
     * - Secondly ordered by reverse length (longest slice first).
     * <p>
     * Result is undefined of the two slices point to different byte buffers.
     *
     * @param o The other slice.
     * @return Compared value.
     */
    public int comparePosition(CharSlice o) {
        // comparing position is only meaningful when using the same buffer.
        if (o.off != off) {
            return Integer.compare(off, o.off);
        }
        return Integer.compare(o.len, len);
    }

    // --- Object ---

    @Override
    public String toString() {
        return new String(fb, off, len);
    }

    @Override
    public int hashCode() {
        // hashing the content of the "shown" part of the FB:
        int hash = 1;
        for (int i = off, l = off + len; i < l; ++i) {
            hash = 31 * hash + fb[i];
        }
        return hash;
    }

    @Override
    public boolean equals(Object o) {
        if (o == this) {
            return true;
        }
        if (o == null) {
            return false;
        }
        if (!(getClass().equals(o.getClass()))) {
            return false;
        }
        CharSlice other = (CharSlice) o;
        if (len != other.len) {
            return false;
        }
        if (fb == other.fb &&
            off == other.off) {
            return true;
        }
        for (int i = 0; i < len; ++i) {
            if (charAt(i) != other.charAt(i)) {
                return false;
            }
        }
        return true;
    }
}