StringUtil.java

/*
 * Copyright (c) 2020, Stein Eldar Johnsen
 *
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements. See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership. The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License. You may obtain a copy of the License at
 *
 *   http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing,
 * software distributed under the License is distributed on an
 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 * KIND, either express or implied. See the License for the
 * specific language governing permissions and limitations
 * under the License.
 */
package net.morimekta.strings;

import net.morimekta.strings.chr.Char;
import net.morimekta.strings.chr.CharStream;
import net.morimekta.strings.chr.Unicode;
import net.morimekta.strings.diff.DiffStringUtil;

import java.util.Collection;
import java.util.List;
import java.util.Locale;
import java.util.concurrent.atomic.AtomicInteger;
import java.util.stream.Collectors;

import static java.util.Objects.requireNonNull;
import static net.morimekta.strings.ConsoleUtil.isConsolePrintable;

/**
 * Utility getting properties of and modifying strings.
 */
public final class StringUtil {
    /**
     * Check if string is null or empty.
     *
     * @param str The string to check.
     * @return If the string is null or empty.
     */
    public static boolean isNullOrEmpty(CharSequence str) {
        if (str == null) {
            return true;
        }
        return str.length() == 0;
    }

    /**
     * Check if string is not empty.
     *
     * @param str The string to check.
     * @return If the string is non-null and not empty.
     */
    public static boolean isNotEmpty(CharSequence str) {
        if (str == null) {
            return false;
        }
        return str.length() > 0;
    }

    /**
     * Make empty strings into null values.
     *
     * @param str The string to check.
     * @return The string if it has content, null otherwise.
     */
    public static String emptyToNull(CharSequence str) {
        if (str == null || str.length() == 0) {
            return null;
        }
        return str.toString();
    }

    /**
     * Capitalize the string. Assuming the first 16-bit char
     * can be upper-cased alone.
     *
     * @param s The source string.
     * @return The capitalized string.
     */
    public static String capitalize(CharSequence s) {
        requireNonNull(s, "s == null");
        return Character.toUpperCase(s.charAt(0)) +
               s.subSequence(1, s.length()).toString();
    }

    /**
     * Make the string into upper-case using US locale.
     *
     * @param s The source string.
     * @return The upper-cased string.
     */
    public static String toUpperCase(CharSequence s) {
        requireNonNull(s, "s == null");
        return s.toString().toUpperCase(Locale.US);
    }

    /**
     * Make the string into lower-case using US locale.
     *
     * @param s The source string.
     * @return The lower-cased string.
     */
    public static String toLowerCase(CharSequence s) {
        requireNonNull(s, "s == null");
        return s.toString().toLowerCase(Locale.US);
    }

    /**
     * Generates whitespace-only string with the same printable length as
     * the source. Handy when printing aligned strings and need to align
     * spaces with a string value.
     *
     * @param s The source string.
     * @return A whitespace string of same length.
     */
    public static String toWhitespace(CharSequence s) {
        return " ".repeat(printableWidth(s));
    }

    /**
     * How many single-characters worth of console real-estate will be taken
     * up by this string if printed. Control characters will be ignored, and
     * double-width characters (CJK) will count as 2 width each.
     * <p>
     * Strings containing carriage movement, CR, LF, unexpanded tabs etc are
     * not allowed, and will cause an IllegalArgumentException.
     *
     * @param string The string to measure.
     * @return The printed width.
     */
    public static int printableWidth(CharSequence string) {
        requireNonNull(string, "string == null");
        return CharStream.stream(string).mapToInt(Char::printableWidth).sum();
    }

    /**
     * Default tab width in number of spaces.
     */
    public static final int TAB_WIDTH = 4;

    /**
     * Expand tabs in string.
     *
     * @param string The string to expand.
     * @return The expanded string.
     */
    public static String expandTabs(CharSequence string) {
        return expandTabs(string, TAB_WIDTH);
    }

    /**
     * Expand tabs in string.
     *
     * @param string   The string to expand.
     * @param tabWidth The tab width.
     * @return The expanded string.
     */
    public static String expandTabs(CharSequence string, int tabWidth) {
        return expandTabs(string, tabWidth, 0);
    }

    /**
     * Expand tabs in string.
     *
     * @param string   The string to expand.
     * @param tabWidth The tab width.
     * @param offset   The initial offset.
     * @return The expanded string.
     */
    public static String expandTabs(CharSequence string, int tabWidth, int offset) {
        requireNonNull(string, "string == null");
        if (tabWidth < 1) {
            throw new IllegalArgumentException("tabWidth " + tabWidth + " < 1");
        }
        if (offset < 0) {
            throw new IllegalArgumentException("offset " + offset + " < 0");
        }
        StringBuilder builder = new StringBuilder();
        AtomicInteger off = new AtomicInteger(offset);
        CharStream.stream(string).forEachOrdered(c -> {
            if (c.codepoint() == '\t') {
                int l = tabWidth - (off.get() % tabWidth);
                builder.append(" ".repeat(Math.max(0, l)));
                off.addAndGet(l);
            } else if (c.codepoint() == '\n') {
                builder.append(c);
                off.set(0);
            } else {
                builder.append(c);
                off.addAndGet(c.printableWidth());
            }
        });
        return builder.toString();
    }

    /**
     * Strip string of all non-printable characters.
     *
     * @param string The source string.
     * @return The result without non-printable chars.
     */
    public static String stripNonPrintable(CharSequence string) {
        requireNonNull(string, "string == null");
        StringBuilder builder = new StringBuilder();
        CharStream.stream(string).forEachOrdered(c -> {
            if (c instanceof Unicode) {
                if (isConsolePrintable(c.codepoint()) ||
                    c.codepoint() == Char.CR || c.codepoint() == Char.LF ||
                    c.equals(Unicode.NBSP)) {
                    builder.append(c);
                }
            }
        });
        return builder.toString();
    }

    /**
     * Remove all printable characters after 'width' characters have been
     * filled. All control chars will be left in place.
     *
     * @param string The base string.
     * @param width  The printed width.
     * @return The clipped string.
     */
    public static String clipWidth(CharSequence string, int width) {
        requireNonNull(string, "string == null");
        AtomicInteger remaining = new AtomicInteger(width);
        StringBuilder builder = new StringBuilder();
        CharStream.stream(string).forEachOrdered(c -> {
            int pw = c.printableWidth();
            if (remaining.get() == 0) {
                // Only add non-unicode after the end (control & color)
                if (!(c instanceof Unicode)) {
                    builder.append(c);
                }
            } else if (pw <= remaining.get()) {
                builder.append(c);
                remaining.addAndGet(-pw);
            } else {
                // To avoid a CJK char to be removed, but the ASCII
                // char after is kept.
                remaining.set(0);
            }
        });
        return builder.toString();
    }

    /**
     * Pad the right side of the string until the <b>printed width</b> becomes the
     * desired visible string length.
     *
     * @param string The string to just.
     * @param width  The total printable width to fill.
     * @return The padded string.
     */
    public static String rightPad(CharSequence string, int width) {
        int pw = printableWidth(string);
        if (pw < width) {
            return string + " ".repeat(width - pw);
        }
        return string.toString();
    }

    /**
     * Pad the left side of the string until the <b>printed width</b> becomes the
     * desired visible string length.
     *
     * @param string The string to just.
     * @param width  The total printable width to fill.
     * @return The padded string.
     */
    public static String leftPad(CharSequence string, int width) {
        int pw = printableWidth(string);
        if (pw < width) {
            return " ".repeat(width - pw) + string;
        }
        return string.toString();
    }

    /**
     * Pad each side of the string until the <b>printed width</b> becomes the
     * desired visible string length.
     *
     * @param string The string to just.
     * @param width  The total printable width to fill.
     * @return The padded string.
     */
    public static String center(CharSequence string, int width) {
        int pw = printableWidth(string);
        if (pw < width) {
            int left = (width - pw) / 2;
            int right = (width - pw) - left;
            return " ".repeat(left) + string + " ".repeat(right);
        }
        return string.toString();
    }

    /**
     * This will attempt to print out the text line wrapped, using the printable
     * width to figure out where to cut each line. It is assumed the entire text
     * consists of a single paragraph of text.
     *
     * @param text  The string to be wrapped. This text will be the making of this
     *              method normalize all spacing, and allow splitting on '-'
     * @param width The total width of the text.
     * @return The wrapped text.
     */
    public static String wrap(CharSequence text, int width) {
        return wrap("", "", text, width);
    }

    /**
     * This will attempt to print out the text line wrapped, using the printable
     * width to figure out where to cut each line. It is assumed the entire text
     * consists of a single paragraph of text.
     *
     * @param prefix String to come before the text on the first line only.
     * @param indent String to prefix the text any subsequent lines.
     * @param text   The string to be wrapped. This text will be the making of this
     *               method normalize all spacing, and allow splitting on '-'
     * @param width  The total width of the text, including prefix / indent.
     * @return The wrapped text.
     */
    public static String wrap(CharSequence prefix,
                              CharSequence indent,
                              CharSequence text,
                              int width) {
        requireNonNull(prefix, "prefix == null");
        requireNonNull(indent, "indent == null");
        requireNonNull(text, "text == null");

        StringBuilder builder = new StringBuilder(prefix);
        int currentWidth = printableWidth(prefix);
        int indentWidth = printableWidth(indent);
        if (currentWidth < indentWidth) {
            builder.append(" ".repeat(indentWidth - currentWidth));
            currentWidth = indentWidth;
        }

        for (String word : text.toString().split("[\\s]+")) {
            if (word.isEmpty()) continue;
            int toAdd = printableWidth(word);
            if (currentWidth == indentWidth) {
                builder.append(word);
                currentWidth += toAdd;
            } else if (currentWidth + toAdd >= width) {
                // new line, unless the line is only indent wide. Normally just the
                // case for the first line.
                builder.append('\n')
                       .append(indent)
                       .append(word);
                currentWidth = indentWidth + toAdd;
            } else {
                builder.append(' ').append(word);
                currentWidth += 1 + toAdd;
            }
        }

        return builder.toString();
    }

    /**
     * Assuming all strings in the path input are '/' separated paths, will
     * find the longest common path prefix (meaning the prefix ends in '/').
     *
     * @param paths Paths to find the longest common prefix for.
     * @return The longest common prefix.
     */
    public static String longestCommonPrefixPath(Collection<String> paths) {
        requireNonNull(paths, "paths == null");
        if (paths.size() == 0) throw new IllegalArgumentException("Empty paths");
        String prefix = paths.iterator().next();
        for (String s : paths) {
            int len = commonPrefix(s, prefix);
            if (len < prefix.length()) {
                prefix = prefix.substring(0, len);
            }
        }
        if (prefix.contains("/")) {
            return prefix.replaceAll("/[^/]*$", "/");
        }
        return "";
    }

    /**
     * Assuming all strings in the path input are '/' separated paths, will
     * find the longest common path prefix (meaning the prefix ends in '/')
     * and remove that prefix from all the input paths.
     *
     * @param paths Paths to find the longest common prefix for.
     * @return The longest common prefix.
     */
    public static List<String> stripCommonPrefixPath(Collection<String> paths) {
        String prefix = longestCommonPrefixPath(paths);
        if (prefix.length() > 0) {
            return paths.stream()
                        .map(s -> s.substring(prefix.length()))
                        .collect(Collectors.toList());
        }
        return List.copyOf(paths);
    }

    /**
     * Determine the common prefix of two strings
     *
     * @param text1 First string.
     * @param text2 Second string.
     * @return The number of characters common to the start of each string.
     */
    public static int commonPrefix(String text1, String text2) {
        return DiffStringUtil.commonPrefix(text1, text2);
    }

    /**
     * Determine the common suffix of two strings
     *
     * @param text1 First string.
     * @param text2 Second string.
     * @return The number of characters common to the end of each string.
     */
    public static int commonSuffix(String text1, String text2) {
        return DiffStringUtil.commonSuffix(text1, text2);
    }

    /**
     * Determine if the suffix of one string is the prefix of another.
     *
     * @param text1 First string.
     * @param text2 Second string.
     * @return The number of characters common to the end of the first
     * string and the start of the second string.
     */
    public static int commonOverlap(String text1, String text2) {
        return DiffStringUtil.commonOverlap(text1, text2);
    }

    // Private constructor to defeat instantiation.
    private StringUtil() {}
}