DiffStringUtil.java
/*
* Copyright (c) 2020, Stein Eldar Johnsen
*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package net.morimekta.strings.diff;
import net.morimekta.strings.chr.CharSlice;
import java.util.LinkedList;
import java.util.List;
/**
* Utility for diff related to string and char sequence handling. This utility
* class should not contain any "true" diff functions, as those belong in
* {@link PatchUtil} and {@code DiffUtil} classes.
*/
public final class DiffStringUtil {
/**
* Split lines of a {@link CharSlice} so that it separates new lines
* with '\n' and '\r\n' as a single new line each.
*
* @param slice The slice to split in lines.
* @return Linked list of lines.
*/
public static List<CharSlice> splitLines(CharSlice slice) {
LinkedList<CharSlice> out = new LinkedList<>();
int start = 0;
for (int i = 0; i < slice.length(); ++i) {
if (slice.charAt(i) == '\n') {
out.add(slice.subSlice(start, i));
start = i + 1;
} else if (slice.charAt(i) == '\r') {
out.add(slice.subSlice(start, i));
start = i + 1;
// skip '\r\n' as if a single newline.
if (i + 1 < slice.length() && slice.charAt(i + 1) == '\n') {
++i;
++start;
}
}
}
// No newline at end of file
if (start < slice.length()) {
out.add(slice.subSlice(start));
}
return out;
}
/*
* The following functions are copied from the java version of
* http://code.google.com/p/google-diff-match-patch/
*
* Copyright 2006 Google Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/**
* Determine the common prefix of two strings
* @param text1 First string.
* @param text2 Second string.
* @return The number of characters common to the start of each string.
*/
public static int commonPrefix(CharSequence text1, CharSequence text2) {
// Performance analysis: http://neil.fraser.name/news/2007/10/09/
int n = Math.min(text1.length(), text2.length());
for (int i = 0; i < n; i++) {
if (text1.charAt(i) != text2.charAt(i)) {
return i;
}
}
return n;
}
/**
* Determine the common suffix of two strings
* @param text1 First string.
* @param text2 Second string.
* @return The number of characters common to the end of each string.
*/
public static int commonSuffix(CharSequence text1, CharSequence text2) {
// Performance analysis: http://neil.fraser.name/news/2007/10/09/
int text1_length = text1.length();
int text2_length = text2.length();
int n = Math.min(text1_length, text2_length);
for (int i = 1; i <= n; i++) {
if (text1.charAt(text1_length - i) != text2.charAt(text2_length - i)) {
return i - 1;
}
}
return n;
}
/**
* Determine if the suffix of one string is the prefix of another.
*
* @param text1 First string.
* @param text2 Second string.
* @return The number of characters common to the end of the first
* string and the start of the second string.
*/
public static int commonOverlap(CharSequence text1, CharSequence text2) {
// Cache the text lengths to prevent multiple calls.
int text1_length = text1.length();
int text2_length = text2.length();
// Eliminate the null case.
if (text1_length == 0 || text2_length == 0) {
return 0;
}
// Truncate the longer string.
if (text1_length > text2_length) {
text1 = text1.subSequence(text1_length - text2_length, text1_length);
text1_length = text2_length;
} else if (text1_length < text2_length) {
text2 = text2.subSequence(0, text1_length);
}
// Quick check for the worst case.
if (text1.equals(text2)) {
return text1_length;
}
int best = 0;
search: for (int pos = text1_length - 1; pos >= 0; --pos) {
final int pos_l = text1_length - pos;
for (int i = 0; i < pos_l; ++i) {
if (text1.charAt(pos + i) != text2.charAt(i)) {
continue search;
}
}
best = pos_l;
}
return best;
}
private DiffStringUtil() {}
}