DiffStringUtil.java

  1. /*
  2.  * Copyright (c) 2020, Stein Eldar Johnsen
  3.  *
  4.  * Licensed to the Apache Software Foundation (ASF) under one
  5.  * or more contributor license agreements. See the NOTICE file
  6.  * distributed with this work for additional information
  7.  * regarding copyright ownership. The ASF licenses this file
  8.  * to you under the Apache License, Version 2.0 (the
  9.  * "License"); you may not use this file except in compliance
  10.  * with the License. You may obtain a copy of the License at
  11.  *
  12.  *   http://www.apache.org/licenses/LICENSE-2.0
  13.  *
  14.  * Unless required by applicable law or agreed to in writing,
  15.  * software distributed under the License is distributed on an
  16.  * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
  17.  * KIND, either express or implied. See the License for the
  18.  * specific language governing permissions and limitations
  19.  * under the License.
  20.  */
  21. package net.morimekta.strings.diff;

  22. import net.morimekta.strings.chr.CharSlice;

  23. import java.util.LinkedList;
  24. import java.util.List;

  25. /**
  26.  * Utility for diff related to string and char sequence handling. This utility
  27.  * class should not contain any "true" diff functions, as those belong in
  28.  * {@link PatchUtil} and {@code DiffUtil} classes.
  29.  */
  30. public final class DiffStringUtil {
  31.     /**
  32.      * Split lines of a {@link CharSlice} so that it separates new lines
  33.      * with '\n' and '\r\n' as a single new line each.
  34.      *
  35.      * @param slice The slice to split in lines.
  36.      * @return Linked list of lines.
  37.      */
  38.     public static List<CharSlice> splitLines(CharSlice slice) {
  39.         LinkedList<CharSlice> out = new LinkedList<>();
  40.         int start = 0;
  41.         for (int i = 0; i < slice.length(); ++i) {
  42.             if (slice.charAt(i) == '\n') {
  43.                 out.add(slice.subSlice(start, i));
  44.                 start = i + 1;
  45.             } else if (slice.charAt(i) == '\r') {
  46.                 out.add(slice.subSlice(start, i));
  47.                 start = i + 1;
  48.                 // skip '\r\n' as if a single newline.
  49.                 if (i + 1 < slice.length() && slice.charAt(i + 1) == '\n') {
  50.                     ++i;
  51.                     ++start;
  52.                 }
  53.             }
  54.         }
  55.         // No newline at end of file
  56.         if (start < slice.length()) {
  57.             out.add(slice.subSlice(start));
  58.         }
  59.         return out;
  60.     }

  61.     /*
  62.      * The following functions are copied from the java version of
  63.      * http://code.google.com/p/google-diff-match-patch/
  64.      *
  65.      * Copyright 2006 Google Inc.
  66.      *
  67.      * Licensed under the Apache License, Version 2.0 (the "License");
  68.      * you may not use this file except in compliance with the License.
  69.      * You may obtain a copy of the License at
  70.      *
  71.      *   http://www.apache.org/licenses/LICENSE-2.0
  72.      *
  73.      * Unless required by applicable law or agreed to in writing, software
  74.      * distributed under the License is distributed on an "AS IS" BASIS,
  75.      * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  76.      * See the License for the specific language governing permissions and
  77.      * limitations under the License.
  78.      */

  79.     /**
  80.      * Determine the common prefix of two strings
  81.      * @param text1 First string.
  82.      * @param text2 Second string.
  83.      * @return The number of characters common to the start of each string.
  84.      */
  85.     public static int commonPrefix(CharSequence text1, CharSequence text2) {
  86.         // Performance analysis: http://neil.fraser.name/news/2007/10/09/
  87.         int n = Math.min(text1.length(), text2.length());
  88.         for (int i = 0; i < n; i++) {
  89.             if (text1.charAt(i) != text2.charAt(i)) {
  90.                 return i;
  91.             }
  92.         }
  93.         return n;
  94.     }

  95.     /**
  96.      * Determine the common suffix of two strings
  97.      * @param text1 First string.
  98.      * @param text2 Second string.
  99.      * @return The number of characters common to the end of each string.
  100.      */
  101.     public static int commonSuffix(CharSequence text1, CharSequence text2) {
  102.         // Performance analysis: http://neil.fraser.name/news/2007/10/09/
  103.         int text1_length = text1.length();
  104.         int text2_length = text2.length();
  105.         int n = Math.min(text1_length, text2_length);
  106.         for (int i = 1; i <= n; i++) {
  107.             if (text1.charAt(text1_length - i) != text2.charAt(text2_length - i)) {
  108.                 return i - 1;
  109.             }
  110.         }
  111.         return n;
  112.     }

  113.     /**
  114.      * Determine if the suffix of one string is the prefix of another.
  115.      *
  116.      * @param text1 First string.
  117.      * @param text2 Second string.
  118.      * @return The number of characters common to the end of the first
  119.      *         string and the start of the second string.
  120.      */
  121.     public static int commonOverlap(CharSequence text1, CharSequence text2) {
  122.         // Cache the text lengths to prevent multiple calls.
  123.         int text1_length = text1.length();
  124.         int text2_length = text2.length();
  125.         // Eliminate the null case.
  126.         if (text1_length == 0 || text2_length == 0) {
  127.             return 0;
  128.         }
  129.         // Truncate the longer string.
  130.         if (text1_length > text2_length) {
  131.             text1 = text1.subSequence(text1_length - text2_length, text1_length);
  132.             text1_length = text2_length;
  133.         } else if (text1_length < text2_length) {
  134.             text2 = text2.subSequence(0, text1_length);
  135.         }
  136.         // Quick check for the worst case.
  137.         if (text1.equals(text2)) {
  138.             return text1_length;
  139.         }

  140.         int best = 0;
  141.         search: for (int pos = text1_length - 1; pos >= 0; --pos) {
  142.             final int pos_l = text1_length - pos;
  143.             for (int i = 0; i < pos_l; ++i) {
  144.                 if (text1.charAt(pos + i) != text2.charAt(i)) {
  145.                     continue search;
  146.                 }
  147.             }
  148.             best = pos_l;
  149.         }
  150.         return best;
  151.     }

  152.     private DiffStringUtil() {}
  153. }