DiffStringUtil.java

/*
 * Copyright (c) 2020, Stein Eldar Johnsen
 *
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements. See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership. The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License. You may obtain a copy of the License at
 *
 *   http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing,
 * software distributed under the License is distributed on an
 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 * KIND, either express or implied. See the License for the
 * specific language governing permissions and limitations
 * under the License.
 */
package net.morimekta.strings.diff;

import net.morimekta.strings.chr.CharSlice;

import java.util.LinkedList;
import java.util.List;

/**
 * Utility for diff related to string and char sequence handling. This utility
 * class should not contain any "true" diff functions, as those belong in
 * {@link PatchUtil} and {@code DiffUtil} classes.
 */
public final class DiffStringUtil {
    /**
     * Split lines of a {@link CharSlice} so that it separates new lines
     * with '\n' and '\r\n' as a single new line each.
     *
     * @param slice The slice to split in lines.
     * @return Linked list of lines.
     */
    public static List<CharSlice> splitLines(CharSlice slice) {
        LinkedList<CharSlice> out = new LinkedList<>();
        int start = 0;
        for (int i = 0; i < slice.length(); ++i) {
            if (slice.charAt(i) == '\n') {
                out.add(slice.subSlice(start, i));
                start = i + 1;
            } else if (slice.charAt(i) == '\r') {
                out.add(slice.subSlice(start, i));
                start = i + 1;
                // skip '\r\n' as if a single newline.
                if (i + 1 < slice.length() && slice.charAt(i + 1) == '\n') {
                    ++i;
                    ++start;
                }
            }
        }
        // No newline at end of file
        if (start < slice.length()) {
            out.add(slice.subSlice(start));
        }
        return out;
    }

    /*
     * The following functions are copied from the java version of
     * http://code.google.com/p/google-diff-match-patch/
     *
     * Copyright 2006 Google Inc.
     *
     * Licensed under the Apache License, Version 2.0 (the "License");
     * you may not use this file except in compliance with the License.
     * You may obtain a copy of the License at
     *
     *   http://www.apache.org/licenses/LICENSE-2.0
     *
     * Unless required by applicable law or agreed to in writing, software
     * distributed under the License is distributed on an "AS IS" BASIS,
     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     * See the License for the specific language governing permissions and
     * limitations under the License.
     */

    /**
     * Determine the common prefix of two strings
     * @param text1 First string.
     * @param text2 Second string.
     * @return The number of characters common to the start of each string.
     */
    public static int commonPrefix(CharSequence text1, CharSequence text2) {
        // Performance analysis: http://neil.fraser.name/news/2007/10/09/
        int n = Math.min(text1.length(), text2.length());
        for (int i = 0; i < n; i++) {
            if (text1.charAt(i) != text2.charAt(i)) {
                return i;
            }
        }
        return n;
    }

    /**
     * Determine the common suffix of two strings
     * @param text1 First string.
     * @param text2 Second string.
     * @return The number of characters common to the end of each string.
     */
    public static int commonSuffix(CharSequence text1, CharSequence text2) {
        // Performance analysis: http://neil.fraser.name/news/2007/10/09/
        int text1_length = text1.length();
        int text2_length = text2.length();
        int n = Math.min(text1_length, text2_length);
        for (int i = 1; i <= n; i++) {
            if (text1.charAt(text1_length - i) != text2.charAt(text2_length - i)) {
                return i - 1;
            }
        }
        return n;
    }

    /**
     * Determine if the suffix of one string is the prefix of another.
     *
     * @param text1 First string.
     * @param text2 Second string.
     * @return The number of characters common to the end of the first
     *         string and the start of the second string.
     */
    public static int commonOverlap(CharSequence text1, CharSequence text2) {
        // Cache the text lengths to prevent multiple calls.
        int text1_length = text1.length();
        int text2_length = text2.length();
        // Eliminate the null case.
        if (text1_length == 0 || text2_length == 0) {
            return 0;
        }
        // Truncate the longer string.
        if (text1_length > text2_length) {
            text1 = text1.subSequence(text1_length - text2_length, text1_length);
            text1_length = text2_length;
        } else if (text1_length < text2_length) {
            text2 = text2.subSequence(0, text1_length);
        }
        // Quick check for the worst case.
        if (text1.equals(text2)) {
            return text1_length;
        }

        int best = 0;
        search: for (int pos = text1_length - 1; pos >= 0; --pos) {
            final int pos_l = text1_length - pos;
            for (int i = 0; i < pos_l; ++i) {
                if (text1.charAt(pos + i) != text2.charAt(i)) {
                    continue search;
                }
            }
            best = pos_l;
        }
        return best;
    }

    private DiffStringUtil() {}
}