PatchUtil.java

/*
 * Copyright (c) 2020, Stein Eldar Johnsen
 *
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements. See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership. The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License. You may obtain a copy of the License at
 *
 *   http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing,
 * software distributed under the License is distributed on an
 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 * KIND, either express or implied. See the License for the
 * specific language governing permissions and limitations
 * under the License.
 */
package net.morimekta.strings.diff;

import net.morimekta.strings.chr.CharSlice;

import java.io.IOException;
import java.io.UncheckedIOException;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashMap;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
import java.util.concurrent.atomic.AtomicInteger;

import static java.lang.String.format;
import static java.util.Objects.requireNonNull;
import static net.morimekta.strings.diff.DiffStringUtil.splitLines;

public final class PatchUtil {
    /**
     * Diff a pair of {@link CharSlice} text content per line, as diff or patch would
     * have it.
     *
     * @param source The source slice.
     * @param target The target slice.
     * @return The list of changes and non-changes in order.
     */
    public static List<Change> diffLines(CharSlice source, CharSlice target) {
        LineMunged munged = makeLineMunged(splitLines(source), splitLines(target));

        int s_start = 0;
        int s_end   = munged.source.length;
        int t_start = 0;
        int t_end   = munged.target.length;

        List<Change> beg = new ArrayList<>(munged.lines.length);
        List<Change> end = new ArrayList<>();

        while (s_start < s_end && t_start < t_end) {
            int src_first = munged.source[s_start];
            int trg_first = munged.target[t_start];

            if (src_first == trg_first) {
                beg.add(new Change(Operation.EQUAL, munged.lines[src_first]));
                ++s_start;
                ++t_start;
                continue;
            }

            int src_last = munged.source[s_end - 1];
            int trg_last = munged.target[t_end - 1];
            if (src_last == trg_last) {
                end.add(new Change(Operation.EQUAL, munged.lines[src_last]));
                --s_end;
                --t_end;
                continue;
            }

            int up_next = indexOf(munged.source, s_start, s_end, trg_first);
            int down_next = indexOf(munged.target, t_start, t_end, src_first);
            if (up_next == -1 && down_next >= 0) {
                beg.add(new Change(Operation.INSERT, munged.lines[trg_first]));
                ++t_start;
                continue;
            }
            if (down_next == -1 && up_next >= 0) {
                beg.add(new Change(Operation.DELETE, munged.lines[src_first]));
                ++s_start;
                continue;
            }

            if (up_next >= 0) {
                // Check number of lines moved **UP** (top in target found in source)
                int up_move = 1;
                while (up_next + up_move + s_start < s_end &&
                       up_move < (s_end - s_start)) {
                    if (munged.source[s_start + up_next + up_move] ==
                        munged.target[t_start + up_move]) {
                        ++up_move;
                    } else {
                        break;
                    }
                }

                // Check number of lines moved **DOWN** (top in source found in target)
                int down_move = 1;
                while (down_next + down_move + t_start < t_end &&
                       down_move < (t_end - t_start)) {
                    if (munged.target[t_start + down_next + down_move] ==
                        munged.source[s_start + down_move]) {
                        ++down_move;
                    } else {
                        break;
                    }
                }

                // First choose the shorter consecutive diff.
                if (up_move > down_move) {
                    up_move = 0;
                } else if (up_move < down_move) {
                    down_move = 0;
                } else {
                    // Then the closest diff.
                    if (up_next > down_next) {
                        up_move = 0;
                    } else {
                        down_move = 0;
                    }
                }

                if (down_move > 0) {
                    ++s_start;
                    --down_move;
                    beg.add(new Change(Operation.DELETE, munged.lines[src_first]));
                    while (down_move-- > 0) {
                        src_first = munged.source[s_start++];
                        beg.add(new Change(Operation.DELETE, munged.lines[src_first]));
                    }
                } else {
                    ++t_start;
                    --up_move;
                    beg.add(new Change(Operation.INSERT, munged.lines[trg_first]));
                    while (up_move-- > 0) {
                        trg_first = munged.target[t_start++];
                        beg.add(new Change(Operation.INSERT, munged.lines[trg_first]));
                    }
                }
                continue;
            }

            // added AND removed, aka change.
            beg.add(new Change(Operation.DELETE, munged.lines[src_first]));
            beg.add(new Change(Operation.INSERT, munged.lines[trg_first]));
            ++s_start;
            ++t_start;
        }

        while (s_start < s_end) {
            beg.add(new Change(Operation.DELETE, munged.lines[munged.source[s_start++]]));
        }
        while (t_start < t_end) {
            beg.add(new Change(Operation.INSERT, munged.lines[munged.target[t_start++]]));
        }
        ArrayList<Change> changes = new ArrayList<>(beg.size() + end.size());
        changes.addAll(beg);
        Collections.reverse(end);
        changes.addAll(end);

        // Sort diff lines so that a continuous change of inserts and deletes becomes:
        // -- all deleted
        // -- all inserts
        ArrayList<Change> result = new ArrayList<>(changes.size());
        ArrayList<Change> inserts = new ArrayList<>(changes.size() / 2);
        for (Change change : changes) {
            switch (change.operation) {
                case EQUAL: {
                    result.addAll(inserts);
                    inserts.clear();
                    result.add(change);
                    break;
                }
                case DELETE: {
                    result.add(change);
                    break;
                }
                case INSERT: {
                    inserts.add(change);
                    break;
                }
            }
        }
        result.addAll(inserts);
        return result;
    }

    /**
     * Diff a pair of text content per line, as diff or patch would
     * have it.
     *
     * @param source The source slice.
     * @param target The target slice.
     * @return The list of changes and non-changes in order.
     */
    public static List<Change> diffLines(CharSequence source, CharSequence target) {
        requireNonNull(source, "source == null");
        requireNonNull(target, "target == null");
        return diffLines(CharSlice.of(source), CharSlice.of(target));
    }

    /**
     * Make a simple patch string. This creates a minimal 'patch' string that can
     * be used with the unix {@code patch} command to update the source file
     * in order to get the target output.
     * <p>
     * Example patch output can be like this:
     *
     * <pre>{@code
     *
     * }</pre>
     *
     * @param changesIn Changes to be displayed.
     * @return The patch string.
     */
    public static String makePatchString(final List<Change> changesIn) {
        try {
            StringBuilder builder = new StringBuilder();
            writePatchString(changesIn, builder);
            return builder.toString();
        } catch (IOException e) {
            throw new UncheckedIOException(e.getMessage(), e);
        }
    }

    /**
     * Write a simple patch string. This creates a minimal 'patch' string that can
     * be used with the unix {@code patch} command to update the source file
     * in order to get the target output.
     * <p>
     * Example patch output can be like this:
     *
     * <pre>{@code
     *
     * }</pre>
     *
     * @param changesIn Changes to be displayed.
     * @param builder   The builder to append string to.
     */
    public static void writePatchString(final List<Change> changesIn,
                                        final Appendable builder) throws IOException {
        requireNonNull(changesIn, "changesIn == null");

        int src_pos = 1, trg_pos = 1;
        LinkedList<Change> changes = new LinkedList<>(changesIn);
        while (!changes.isEmpty()) {
            if (changes.peekFirst().operation == Operation.EQUAL) {
                changes.pollFirst();
                ++src_pos;
                ++trg_pos;
                continue;
            }
            LinkedList<Change> upcoming = new LinkedList<>();

            int ins = 0;
            int rms = 0;
            while (!changes.isEmpty()) {
                if (changes.peekFirst().operation == Operation.EQUAL) {
                    break;
                }
                Change ch = changes.pollFirst();
                upcoming.add(ch);
                if (ch.operation == Operation.INSERT) {
                    ++ins;
                } else {
                    ++rms;
                }
            }

            builder.append(format("@@ -%d,%d +%d,%d @@%n", src_pos, rms, trg_pos, ins));

            for (Change ch : upcoming) {
                builder.append(ch.operation.patch)
                       .append(ch.text)
                       .append('\n');
                if (ch.operation == Operation.INSERT) {
                    ++trg_pos;
                } else {
                    ++src_pos;
                }
            }
        }
    }

    /**
     * Make a display-patch output with extra lines before and after each change.
     *
     * @param changesIn List of changes to be displayed.
     * @param options   Options for what and hot to display the patch.
     */
    public static String makeDisplayPatchString(
            List<Change> changesIn,
            PatchOptions options) {
        try {
            StringBuilder builder = new StringBuilder();
            writeDisplayPatchString(changesIn, builder, options);
            return builder.toString();
        } catch (IOException e) {
            // Should be impossible to reach.
            throw new UncheckedIOException(e.getMessage(), e);
        }
    }

    /**
     * Make a display-patch output with extra lines before and after each change.
     *
     * @param changesIn List of changes to be displayed.
     * @param options   Options for what and hot to display the patch.
     */
    public static void writeDisplayPatchString(
            final List<Change> changesIn,
            final Appendable builder,
            final PatchOptions options) throws IOException {
        requireNonNull(changesIn, "changedIn == null");
        requireNonNull(options, "options == null");

        int skippedLines = -1;
        int sourceLineNo = 0;
        int targetLineNo = 0;

        ArrayList<Change> changes = new ArrayList<>(changesIn);
        boolean printBefore = false;
        int printAfter = 0;
        for (int i = 0; i <= options.before && i < changes.size(); ++i) {
            if (changes.get(i).operation != Operation.EQUAL) {
                printBefore = true;
                break;
            }
        }

        for (int i = 0; i < changes.size(); ++i) {
            Change ch = changes.get(i);
            switch (ch.operation) {
                case EQUAL: {
                    ++sourceLineNo;
                    ++targetLineNo;
                    {
                        if (!printBefore && printAfter < 1) {
                            // we're not currently printing anything...
                            if (options.before > 0) {
                                // check set of 'before' lines after current to check for changes.
                                for (int off = 0; off < options.before && i + off + 1 < changes.size(); ++off) {
                                    if (changes.get(i + off + 1).operation != Operation.EQUAL) {
                                        printBefore = true;
                                        break;
                                    }
                                }
                            }
                            // If not, check if we might be skipping 1 line.
                            if (!printBefore) {
                                if ((options.before > 0 || options.after > 0) &&
                                    i > options.after &&
                                    i < (changes.size() - options.before - 1) &&
                                    changes.get(i - options.after - 1).operation != Operation.EQUAL &&
                                    changes.get(i + options.before + 1).operation != Operation.EQUAL) {
                                    // never skip 1 line if showing before or after line is enabled.
                                    printBefore = true;
                                }
                            }
                        }

                        if (printBefore ||
                            printAfter-- > 0) {
                            if (skippedLines != 0) {
                                appendPatchLine(options.beforePatch,
                                                options.afterPatch,
                                                options.beforeComment,
                                                options.afterComment,
                                                builder,
                                                skippedLines,
                                                sourceLineNo,
                                                targetLineNo);
                                skippedLines = 0;
                            }

                            builder.append(options.beforeEq)
                                   .append(" ")
                                   .append(ch.text)
                                   .append(options.afterEq)
                                   .append('\n');
                        } else {
                            // Skip this line.
                            if (skippedLines < 1) {
                                // First line skipped, go from -1 to 1.
                                skippedLines = 1;
                            } else {
                                ++skippedLines;
                            }
                        }
                    }
                    break;
                }
                case DELETE: {
                    ++sourceLineNo;
                    if (skippedLines != 0) {
                        appendPatchLine(options.beforePatch,
                                        options.afterPatch,
                                        options.beforeComment,
                                        options.afterComment,
                                        builder,
                                        skippedLines,
                                        sourceLineNo,
                                        targetLineNo);
                        skippedLines = 0;
                    }
                    builder.append(options.beforeDel)
                           .append("-")
                           .append(ch.text)
                           .append(options.afterChange)
                           .append('\n');
                    printAfter = options.after;
                    printBefore = false;
                    break;
                }
                case INSERT: {
                    ++targetLineNo;
                    if (skippedLines != 0) {
                        appendPatchLine(options.beforePatch,
                                        options.afterPatch,
                                        options.beforeComment,
                                        options.afterComment,
                                        builder,
                                        skippedLines,
                                        sourceLineNo,
                                        targetLineNo);
                        skippedLines = 0;
                    }
                    builder.append(options.beforeAdd)
                           .append("+")
                           .append(ch.text)
                           .append(options.afterChange)
                           .append('\n');
                    printAfter = options.after;
                    printBefore = false;
                    break;
                }
            }
        }
    }

    private static void appendPatchLine(
            String beforePatch,
            String afterPatch,
            String beforeComment,
            String afterComment,
            Appendable builder,
            int skippedLines,
            int sourceLineNo,
            int targetLineNo) throws IOException {
        builder.append(format("%s@@ -%d +%d @@%s", beforePatch, sourceLineNo, targetLineNo, afterPatch));
        if (skippedLines > 0) {
            builder.append(format("%s -- (skipped %d lines)%s", beforeComment, skippedLines, afterComment));
        }
        builder.append('\n');
    }

    private static class LineMunged {
        private final int[]       source;
        private final int[]       target;
        private final CharSlice[] lines;

        private LineMunged(int[] source, int[] target, CharSlice[] lines) {
            this.source = source;
            this.target = target;
            this.lines = lines;
        }
    }

    private static LineMunged makeLineMunged(List<CharSlice> sourceLines, List<CharSlice> targetLines) {
        int[]                   source = new int[sourceLines.size()];
        int[]                   target = new int[targetLines.size()];
        CharSlice[]             lines  = new CharSlice[source.length + target.length];
        AtomicInteger           idx    = new AtomicInteger();
        Map<CharSlice, Integer> tmp    = new HashMap<>(lines.length);
        int                     li     = 0;
        int                     si     = 0;
        for (CharSlice src : sourceLines) {
            int i = tmp.computeIfAbsent(src, s -> idx.getAndIncrement());
            source[si++] = i;
            if (i == li) {
                lines[li++] = src;
            }
        }
        int ti = 0;
        for (CharSlice trg : targetLines) {
            int i = tmp.computeIfAbsent(trg, s -> idx.getAndIncrement());
            target[ti++] = i;
            if (i == li) {
                lines[li++] = trg;
            }
        }
        return new LineMunged(source, target, lines);
    }

    private static int indexOf(int[] arr, int startOffset, int endOffset, int i) {
        int len = endOffset - startOffset;
        for (int x = 0; x < len; ++x) {
            if (arr[startOffset + x] == i) return x;
        }
        return -1;
    }

    private PatchUtil() {}
}