ConsoleUtil.java
/*
* Copyright (c) 2020, Stein Eldar Johnsen
*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package net.morimekta.strings;
import static java.lang.Character.isHighSurrogate;
import static java.lang.Character.isLowSurrogate;
import static java.lang.Character.toCodePoint;
import static java.util.Objects.requireNonNull;
/**
* Utility getting console related properties or modifications of strings.
*/
public final class ConsoleUtil {
/**
* Replace non-printable chars in a string with something else. The
* replacement is static and only meant as a place-holder. It is advised
* to use a non-standard char as the replacement, as otherwise it will
* not be distinguishable from the standard "printable".
*
* @param str The string to escape.
* @param replacement Char to replace non-printable with.
* @return The escaped char string.
*/
public static String replaceNonPrintable(CharSequence str, char replacement) {
requireNonNull(str, "str == null");
StringBuilder builder = new StringBuilder();
for (int i = 0; i < str.length(); ++i) {
char c1 = str.charAt(i);
if (isHighSurrogate(c1)) {
if (i + 1 < str.length()) {
++i;
char c2 = str.charAt(i);
if (isLowSurrogate(c2)) {
int cp = toCodePoint(c1, c2);
if (isConsolePrintable(cp)) {
builder.append(c1).append(c2);
} else {
builder.append(replacement);
}
} else if (isConsolePrintable(c2)) {
builder.append(replacement).append(c2);
} else {
builder.append(replacement).append(replacement);
}
} else {
builder.append(replacement);
}
} else if (isLowSurrogate(c1)) {
builder.append(replacement);
} else if (isConsolePrintable(c1)) {
builder.append(c1);
} else {
builder.append(replacement);
}
}
return builder.toString();
}
/**
* Utility to figure out if a character is printable to the console as
* a character. Returns false if one of:
* <ul>
* <li>The character is a control character.
* <li>The character is not defined.
* <li>The character does not have a known representation.
* </ul>
*
* @param cp The character unicode code point.
* @return If it is printable.
*/
public static boolean isConsolePrintable(int cp) {
return (cp >= 0x20 && cp < 0x7F) || // main printable ascii
Character.isDefined(cp) &&
!Character.isISOControl(cp) &&
!((cp < 0x0020 && cp != '\n') ||
(0x007F <= cp && cp < 0x00A0) ||
(0x058b <= cp && cp <= 0x058c) ||
cp == 0x0590 ||
(0x05c8 <= cp && cp <= 0x05cf) ||
(0x05ec <= cp && cp <= 0x05ef) ||
(0x05f8 <= cp && cp <= 0x05ff) ||
cp == 0x061d ||
(0x085c <= cp && cp <= 0x089f) ||
cp == 0x08b5 ||
(0x08bf <= cp && cp <= 0x08d2) ||
cp == 0x0ac6 ||
(0x0bfb <= cp && cp <= 0x0bff) ||
cp == 0x0c04 ||
cp == 0x0c0d ||
cp == 0x0c11 ||
cp == 0x0c29 ||
(0x0c3a <= cp && cp <= 0x0c3c) ||
cp == 0x0c45 ||
cp == 0x0c49 ||
(0x0c4e <= cp && cp <= 0x0c54) ||
(0x0c5b <= cp && cp <= 0x0c5f) ||
(0x0c64 <= cp && cp <= 0x0c65) ||
cp == 0x0c70 ||
(0x0c72 <= cp && cp <= 0x0c77) ||
(0x0c80 <= cp && cp <= 0x0c81) ||
cp == 0x0c84 ||
cp == 0x0c8d ||
cp == 0x0c91 ||
cp == 0x0ca9 ||
cp == 0x0cb4 ||
(0x0cba <= cp && cp <= 0x0cbb) ||
cp == 0x0cc5 ||
cp == 0x0cc9 ||
(0x0cce <= cp && cp <= 0x0cd4) ||
(0x0cd7 <= cp && cp <= 0x0cdd) ||
cp == 0x0cdf ||
(0x0ce4 <= cp && cp <= 0x0ce5) ||
cp == 0x0cf0 ||
(0x0cf3 <= cp && cp <= 0x0cff) ||
cp == 0x0d0d ||
cp == 0x0d11 ||
cp == 0x0ece ||
cp == 0x0f8c ||
// ----
cp == 0x10cd ||
cp == 0x10fd || cp == 0x10fe || cp == 0x10ff ||
cp == 0x1715 || cp == 0x171f ||
(0x1737 <= cp && cp <= 0x173f) ||
cp == 0x180f ||
(0x181a <= cp && cp <= 0x181f) ||
(0x1ac1 <= cp && cp <= 0x1aff) ||
cp == 0x1b7d || cp == 0x1b7e ||
// ----
(0x202a <= cp && cp <= 0x202e) ||
(0x2066 <= cp && cp <= 0x2068) ||
cp == 0x2072 || cp == 0x2073 ||
cp == 0x2c2b || cp == 0x2c2c || cp == 0x2c2d ||
(0x2cf4 <= cp && cp <= 0x2cf8) ||
(0x2e53 <= cp && cp <= 0x2e5d) ||
// ----
cp == 0x3098 ||
(0x3100 <= cp && cp <= 0x3104) ||
cp == 0x3130 ||
(0x31bc <= cp && cp <= 0x31bf) ||
// ----
cp == 0xa7c0 || cp == 0xa7c1 ||
(0xa7d0 <= cp && cp <= 0xa7f4) ||
// ----
(0xd7fc <= cp && cp <= 0xd7ff) ||
// ----
// NOTE: Unicode characters in the range 0xd800-0xdfff
// are reserved for handling UTF-16 extended characters, using
// one high-surrogate + one low surrogate pair of 10 bits each,
// so effectively upping the codepoint value limit from 16 to
// 20 bits (0x100000).
(0xd800 <= cp && cp <= 0xdfff) ||
// ----
(0xe000 <= cp && cp <= 0xe0fe) ||
(0xe100 <= cp && cp <= 0xebff) ||
cp == 0xec07 || cp == 0xec08 ||
(0xec0c <= cp && cp <= 0xeeff) ||
(0xef1a <= cp && cp <= 0xefbe) ||
(0xefed <= cp && cp <= 0xeff9) ||
cp == 0xf01f || cp == 0xf020 ||
cp == 0xf0af ||
(0xf0b3 <= cp && cp <= 0xf0bf) ||
cp == 0xf0cf || cp == 0xf0df || cp == 0xf0ef || cp == 0xf0ff ||
cp == 0xf10f || cp == 0xf11f || cp == 0xf12f || cp == 0xf13f ||
cp == 0xf14f || cp == 0xf15f || cp == 0xf16f || cp == 0xf17f ||
cp == 0xf18f || cp == 0xf19f || cp == 0xf1af || cp == 0xf1bf ||
cp == 0xf1cf || cp == 0xf1df || cp == 0xf1ef || cp == 0xf1ff ||
cp == 0xf20f || cp == 0xf21f || cp == 0xf220 ||
cp == 0xf24f || cp == 0xf25f || cp == 0xf26f || cp == 0xf27f ||
cp == 0xf28f || cp == 0xf29f || cp == 0xf2af || cp == 0xf2bf ||
cp == 0xf2cf || cp == 0xf2df ||
(0xf2ef <= cp && cp <= 0xf3ff) ||
(0xf442 <= cp && cp <= 0xf4ff) ||
(0xf501 <= cp && cp <= 0xf505) ||
(0xf512 <= cp && cp <= 0xf809 && cp != 0xf5c5 &&
cp != 0xf6c4 && cp != 0xf6c5 && cp != 0xf6c6 && cp != 0xf6c7 && cp != 0xf6c8 &&
cp != 0xf6d1 && cp != 0xf6d4) ||
(0xf81e <= cp && cp <= 0xf8fe) ||
(0xfa70 <= cp && cp <= 0xfaff) ||
cp == 0xfbc2 ||
(0xfd40 <= cp && cp <= 0xfd4f) ||
cp == 0xfdcf || cp == 0xfdfe || cp == 0xfdff
);
}
private ConsoleUtil() {}
}