CharSlice.java
/*
* Copyright (c) 2016, Stein Eldar Johnsen
*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package net.morimekta.strings.chr;
import static java.lang.Math.min;
/**
* A slice of a character array. This class is handy to expand on whenever
* a very large number of sub-strings are handled, so you are guaranteed
* that the underlying character array is never copied. Also:
*
* <ul>
* <li>Ordering is in a structured order of the underlying buffer, not
* of the visible string itself.
* </li>
* <li>Equality and hash is based on the assumption that you care
* about the underlying buffer more than the actual characters.
* </li>
* <li>
* The {@link #toString()} method returns the "visible" content
* of the slice, as it is was a string.
* </li>
* </ul>
* <p>
* As the characters in the buffer is not considered part of the data of
* this class, it can be considered fully immutable.
*/
public class CharSlice implements Comparable<CharSlice>, CharSequence {
/**
* Character buffer.
*/
protected final char[] fb;
/**
* Offset in buffer
*/
protected final int off;
/**
* Length of slice.
*/
protected final int len;
/**
* Create a slice instance.
*
* @param raw The raw string to wrap.
*/
public CharSlice(String raw) {
this.fb = raw.toCharArray();
this.off = 0;
this.len = fb.length;
}
/**
* Create a slice from a full character buffer.
*
* @param fb The char buffer to wrap in the slice.
*/
public CharSlice(char[] fb) {
this(fb, 0, fb.length);
}
/**
* Create a slice instance. The slice is only meant to be internal state
* immutable, and not representing an immutable byte content.
*
* @param fb The buffer to wrap.
* @param off The start offset to wrap.
* @param len The length to represent.
*/
public CharSlice(char[] fb, int off, int len) {
if (fb == null) {
throw new IllegalArgumentException("No buffer");
}
if (off < 0) {
throw new IllegalArgumentException("Negative offset: " + off);
}
if (len < 0) {
throw new IllegalArgumentException("Negative length: " + len);
}
if (off + len > fb.length) {
throw new IllegalArgumentException("Too large slice: " + off + "+" + len + " of " + fb.length);
}
this.fb = fb;
this.off = off;
this.len = len;
}
/**
* Get the char slice for the sequence.
*
* @param str Get char slice of the sequence.
* @return The slice or slice wrapping the string.
*/
public static CharSlice of(CharSequence str) {
if (str instanceof CharSlice) {
return (CharSlice) str;
}
return new CharSlice(str.toString().toCharArray());
}
/**
* Get the offset of the buffer.
*
* @return The slice offset.
*/
public int offset() {
return off;
}
/**
* Similar to subSequence, but also handles negative offset, where a negative
* offset means relative to the end. This will return the slice from the start
* position of the index, until the end. E.g. <code>subSlice(-2)</code> will
* give a slice containing the last two chars of the origin.
*
* @param start The start offset, inclusive.
* @return The resulting slice.
*/
public CharSlice subSlice(int start) {
if (start < -len || start > len) {
throw new IllegalArgumentException(
String.format("[%d:%d] of slice length %d is not valid.", start, len, len));
}
if (start < 0) {
start = len + start;
}
return subSequence(start, len);
}
/**
* Similar to subSequence, but also handles negative offset, where a negative
* offset means relative to the end.
*
* @param start The start offset, inclusive.
* @param end The end offset, exclusive.
* @return The resulting slice.
*/
public CharSlice subSlice(int start, int end) {
if (start < -len || start >= len || end < -len || end > len) {
throw new IllegalArgumentException(
String.format("[%d:%d] of slice length %d is not valid.", start, end, len));
}
if (start < 0) {
start = len + start;
}
if (end < 0) {
end = len + end;
}
if (end < start) {
throw new IllegalArgumentException(
String.format("[%d:%d] of slice length %d is not valid.", start, end, len));
}
return subSequence(start, end);
}
/**
* Get char char at the dynamic index. This handles negative indices as counting
* from the end of the slice.
*
* @param index The index of the requested char.
* @return The char at given index.
*/
public char at(int index) {
if (index < 0) {
return charAt(len + index);
}
return charAt(index);
}
// --- CharSequence ---
/**
* Get the total length of the slice.
*
* @return The slice length.
*/
@Override
public int length() {
return len;
}
/**
* Create a substring slice based on the current slice.
* <p>
* {@inheritDoc}
*
* @param start The internal start position, relative to the slice's offset, inclusive.
* @param end The internal end position, relative to the slice's offset, exclusive.
* @return The substring slice.
*/
@Override
public CharSlice subSequence(int start, int end) {
if (start < 0 || start > end || end > len) {
throw new IllegalArgumentException(
String.format("[%d:%d] of slice length %d is not valid.", start, end, len));
}
if (start == 0 && len == end) {
return this;
}
int l = end - start;
return new CharSlice(fb, off + start, l);
}
/**
* Get character at slice relative position.
*
* @param i The position to get. If negative is relative to the slice's end position.
* @return The char at given position.
*/
@Override
public char charAt(int i) {
if (i < 0 || len <= i) {
throw new IllegalArgumentException(
String.format("[%d] of slice length %d is not valid.", i, len));
}
return fb[off + i];
}
// --- Comparable ---
/**
* Compare slice with other slice as a string. This will always do
* a simple char-by-char value comparison, ignoring locale collation.
*
* @param o The other slice.
* @return Compared value.
*/
@Override
public int compareTo(CharSlice o) {
if (o.fb == fb && o.off == off) {
// comparing this way is only meaningful when using the same buffer.
return Integer.compare(len, o.len);
}
final int minLen = min(len, o.len);
for (int i = 0; i < minLen; ++i) {
char tc = fb[off + i];
char oc = o.fb[o.off + i];
if (oc != tc) {
return Character.compare(tc, oc);
}
}
return Integer.compare(len, o.len);
}
/**
* Handy method to compare slices based on the position in the main buffer.
* <p>
* Slice ordering:
* - Firstly ordered by start (offset) position.
* - Secondly ordered by reverse length (longest slice first).
* <p>
* Result is undefined of the two slices point to different byte buffers.
*
* @param o The other slice.
* @return Compared value.
*/
public int comparePosition(CharSlice o) {
// comparing position is only meaningful when using the same buffer.
if (o.off != off) {
return Integer.compare(off, o.off);
}
return Integer.compare(o.len, len);
}
// --- Object ---
@Override
public String toString() {
return new String(fb, off, len);
}
@Override
public int hashCode() {
// hashing the content of the "shown" part of the FB:
int hash = 1;
for (int i = off, l = off + len; i < l; ++i) {
hash = 31 * hash + fb[i];
}
return hash;
}
@Override
public boolean equals(Object o) {
if (o == this) {
return true;
}
if (o == null) {
return false;
}
if (!(getClass().equals(o.getClass()))) {
return false;
}
CharSlice other = (CharSlice) o;
if (len != other.len) {
return false;
}
if (fb == other.fb &&
off == other.off) {
return true;
}
for (int i = 0; i < len; ++i) {
if (charAt(i) != other.charAt(i)) {
return false;
}
}
return true;
}
}