DiffStringUtil.java
- /*
- * Copyright (c) 2020, Stein Eldar Johnsen
- *
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied. See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
- package net.morimekta.strings.diff;
- import net.morimekta.strings.chr.CharSlice;
- import java.util.LinkedList;
- import java.util.List;
- /**
- * Utility for diff related to string and char sequence handling. This utility
- * class should not contain any "true" diff functions, as those belong in
- * {@link PatchUtil} and {@code DiffUtil} classes.
- */
- public final class DiffStringUtil {
- /**
- * Split lines of a {@link CharSlice} so that it separates new lines
- * with '\n' and '\r\n' as a single new line each.
- *
- * @param slice The slice to split in lines.
- * @return Linked list of lines.
- */
- public static List<CharSlice> splitLines(CharSlice slice) {
- LinkedList<CharSlice> out = new LinkedList<>();
- int start = 0;
- for (int i = 0; i < slice.length(); ++i) {
- if (slice.charAt(i) == '\n') {
- out.add(slice.subSlice(start, i));
- start = i + 1;
- } else if (slice.charAt(i) == '\r') {
- out.add(slice.subSlice(start, i));
- start = i + 1;
- // skip '\r\n' as if a single newline.
- if (i + 1 < slice.length() && slice.charAt(i + 1) == '\n') {
- ++i;
- ++start;
- }
- }
- }
- // No newline at end of file
- if (start < slice.length()) {
- out.add(slice.subSlice(start));
- }
- return out;
- }
- /*
- * The following functions are copied from the java version of
- * http://code.google.com/p/google-diff-match-patch/
- *
- * Copyright 2006 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
- /**
- * Determine the common prefix of two strings
- * @param text1 First string.
- * @param text2 Second string.
- * @return The number of characters common to the start of each string.
- */
- public static int commonPrefix(CharSequence text1, CharSequence text2) {
- // Performance analysis: http://neil.fraser.name/news/2007/10/09/
- int n = Math.min(text1.length(), text2.length());
- for (int i = 0; i < n; i++) {
- if (text1.charAt(i) != text2.charAt(i)) {
- return i;
- }
- }
- return n;
- }
- /**
- * Determine the common suffix of two strings
- * @param text1 First string.
- * @param text2 Second string.
- * @return The number of characters common to the end of each string.
- */
- public static int commonSuffix(CharSequence text1, CharSequence text2) {
- // Performance analysis: http://neil.fraser.name/news/2007/10/09/
- int text1_length = text1.length();
- int text2_length = text2.length();
- int n = Math.min(text1_length, text2_length);
- for (int i = 1; i <= n; i++) {
- if (text1.charAt(text1_length - i) != text2.charAt(text2_length - i)) {
- return i - 1;
- }
- }
- return n;
- }
- /**
- * Determine if the suffix of one string is the prefix of another.
- *
- * @param text1 First string.
- * @param text2 Second string.
- * @return The number of characters common to the end of the first
- * string and the start of the second string.
- */
- public static int commonOverlap(CharSequence text1, CharSequence text2) {
- // Cache the text lengths to prevent multiple calls.
- int text1_length = text1.length();
- int text2_length = text2.length();
- // Eliminate the null case.
- if (text1_length == 0 || text2_length == 0) {
- return 0;
- }
- // Truncate the longer string.
- if (text1_length > text2_length) {
- text1 = text1.subSequence(text1_length - text2_length, text1_length);
- text1_length = text2_length;
- } else if (text1_length < text2_length) {
- text2 = text2.subSequence(0, text1_length);
- }
- // Quick check for the worst case.
- if (text1.equals(text2)) {
- return text1_length;
- }
- int best = 0;
- search: for (int pos = text1_length - 1; pos >= 0; --pos) {
- final int pos_l = text1_length - pos;
- for (int i = 0; i < pos_l; ++i) {
- if (text1.charAt(pos + i) != text2.charAt(i)) {
- continue search;
- }
- }
- best = pos_l;
- }
- return best;
- }
- private DiffStringUtil() {}
- }