NamingUtil.java

/*
 * Copyright (c) 2020, Stein Eldar Johnsen
 *
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements. See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership. The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License. You may obtain a copy of the License at
 *
 *   http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing,
 * software distributed under the License is distributed on an
 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 * KIND, either express or implied. See the License for the
 * specific language governing permissions and limitations
 * under the License.
 */
package net.morimekta.strings;

import net.morimekta.strings.chr.Char;

import java.util.ArrayList;
import java.util.List;
import java.util.Locale;
import java.util.stream.Collectors;

import static java.lang.Character.isLetter;
import static java.lang.Character.isLetterOrDigit;
import static java.lang.Character.isLowerCase;
import static java.lang.Character.isUpperCase;
import static java.util.Objects.requireNonNull;

/**
 * Utility to handle multiple word identifiers and means of converting
 * between them.
 */
public final class NamingUtil {
    /**
     * Formats to be used with name formatting. Any string following either of
     * these naming schemes, should be possible to consistently convert to any
     * other format.
     * <p>
     * See Wikipedia on
     * <a href="https://en.wikipedia.org/wiki/Naming_convention_(programming)#Multiple-word_identifiers">
     *     Multiple Word Identifiers
     * </a> for details. Note that not all possible such formats are handled by
     * the {@link NamingUtil#format(List, Format)} method, only the ones specified
     * in this enum.
     */
    public enum Format {
        /**
         * A namespace format concatenates lower-cased names with '.',
         * <p>
         * E.g.: {@code namespace.for.my.string}
         */
        NAMESPACE,
        /**
         * A namespace format concatenates lower-cased names with '-',
         * <p>
         * E.g.: {@code namespace-for-my-string}
         */
        LISP,
        /**
         * Snake case format concatenates lower-cased names with '_',
         * <p>
         * E.g. {@code snake_case}.
         */
        SNAKE,
        /**
         * Upper case snake case, also called 'screaming snake' case.
         * <p>
         * E.g. {@code SCREAMING_SNAKE_CASE}.
         */
        SNAKE_UPPER,
        /**
         * Pascal case, also known as Upper Camel Case is the standard java naming
         * scheme for classes, and capitalizes each name, and concatenates them
         * directly.
         * <p>
         * E.g.: {@code CamelCase}
         */
        PASCAL,
        /**
         * The Dromedary, or Arabic camel, famous for being the one-humped camel,
         * means a camel case where the first word is not capitalized. Also called
         * 'lower Camel Case' and in certain circumstances the 'hungarian notation'.
         * See <a href="https://en.wikipedia.org/wiki/Camel_case">wikipedia</a> for
         * details.
         * <p>
         * E.g.: {@code dromedaryCase}
         */
        CAMEL,
    }

    /**
     * Format a string to the desired naming scheme.
     *
     * @param source Name to split and re-format.
     * @param format The desired name format.
     * @return The formatted string.
     */
    public static String format(CharSequence source, Format format) {
        return format(splitNameSource(source), format);
    }

    /**
     * Format a list of names to the desired naming scheme.
     *
     * @param names List of names to format and concatenate.
     * @param format The desired name format.
     * @return The formatted string.
     */
    public static String format(List<String> names, Format format) {
        requireNonNull(names, "names == null");
        requireNonNull(format, "format == null");
        switch (format) {
            case NAMESPACE:
                return names.stream()
                            .map(StringUtil::toLowerCase)
                            .collect(Collectors.joining("."));
            case LISP:
                return names.stream()
                            .map(StringUtil::toLowerCase)
                            .collect(Collectors.joining("-"));
            case SNAKE:
                return names.stream()
                        .map(StringUtil::toLowerCase)
                        .collect(Collectors.joining("_"));
            case SNAKE_UPPER:
                return names.stream()
                        .map(StringUtil::toUpperCase)
                        .collect(Collectors.joining("_"));
            case PASCAL:
                return names.stream()
                        .map(StringUtil::toLowerCase)
                        .map(StringUtil::capitalize)
                        .collect(Collectors.joining());
            case CAMEL:
                if (names.isEmpty()) return "";
                return names.get(0).toLowerCase(Locale.US) +
                       names.stream()
                            .skip(1)
                            .map(StringUtil::toLowerCase)
                            .map(StringUtil::capitalize)
                            .collect(Collectors.joining());
            default:
                // untestable as it has exhausted enum values.
                throw new IllegalArgumentException("Unsupported format " + format);
        }
    }

    /**
     * Split the input name into name components using a wide variety of
     * naming conventions. The split follows the rule that a name boundary
     * is found when:
     *
     * <ul>
     *     <li>
     *         An uppercase character is seen after a lower-case character. The
     *         split is then just before the uppercase character.
     *     </li>
     *     <li>
     *         Any non-letter, non-digit character is interpreted as a boundary
     *         in of itself, itself not being part of a name.
     *     </li>
     *     <li>
     *         Empty names are skipped.
     *     </li>
     * </ul>
     *
     * @param sequence The char sequence to interpret.
     * @return List of string name components.
     */
    public static List<String> splitNameSource(CharSequence sequence) {
        List<String>  names = new ArrayList<>();
        StringBuilder builder = new StringBuilder();

        char lastLetter = Char.NUL;
        for (int i = 0; i < sequence.length(); ++i) {
            char c = sequence.charAt(i);
            if (isLetterOrDigit(c)) {
                if (isLetter(c)) {
                    if (builder.length() > 0 && isUpperCase(c) && isLowerCase(lastLetter)) {
                        // new name in CamelCase and DromedaryCase
                        names.add(builder.toString());
                        builder = new StringBuilder();
                    }
                    lastLetter = c;
                }
                // digits are part of the preceding name.
                builder.append(c);
            } else {
                if (builder.length() > 0) {
                    // new name in snake case, namespace or other.
                    names.add(builder.toString());
                    builder = new StringBuilder();
                }
            }
        }
        if (builder.length() > 0) {
            names.add(builder.toString());
        }
        return names;
    }

    private NamingUtil() {}
}