GSMCharsetUtil.java
/*
* Copyright (c) 2020, Stein Eldar Johnsen
*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package net.morimekta.strings.internal;
import java.util.HashMap;
import java.util.HashSet;
import java.util.LinkedHashMap;
import java.util.Map;
import java.util.Set;
public final class GSMCharsetUtil {
public static final String DEFAULT_BC;
public static final String DEFAULT_EX;
public static final String TURKISH_LS;
public static final String TURKISH_SS;
public static final String SPANISH_SS;
public static final String PORTUGUESE_LS;
public static final String PORTUGUESE_SS;
public static final String BENGALI_LS;
public static final String BENGALI_SS;
public static final Map<Byte, String> BENGALI_SX;
public static final String GUJARATI_LS;
public static final String GUJARATI_SS;
public static final String HINDI_LS;
public static final String HINDI_SS;
public static final Map<Byte, String> HINDI_SX;
public static final String KANNADA_LS;
public static final String KANNADA_SS;
public static final String MALAYALAM_LS;
public static final String MALAYALAM_SS;
public static final String ORIYA_LS;
public static final String ORIYA_SS;
public static final Map<Byte, String> ORIYA_SX;
public static final String PUNJABI_LS;
public static final String PUNJABI_SS;
public static final Map<Byte, String> PUNJABI_LX;
public static final Map<Byte, String> PUNJABI_SX;
public static final String TAMIL_LS;
public static final String TAMIL_SS;
public static final String TELUGU_LS;
public static final String TELUGU_SS;
public static final String URDU_LS;
public static final String URDU_SS;
public static final byte SHIFT_SEPTET = 0x1b;
public static final char EXT_CODE = '\2';
static {
// The basic set only uses 7 bits (a septet) for each
// character. If the 'esc' char is encountered, the next
// char (only) is taken from the basic extension table.
DEFAULT_BC = join(
"@£$¥èéùìòÇ\nØø\rÅå",
"Δ_ΦΓΛΩΠΨΣΘΞ\1ÆæßÉ",
" !\"#¤%&'()*+,-./",
"0123456789:;<=>?",
"¡ABCDEFGHIJKLMNO",
"PQRSTUVWXYZÄÖÑܧ",
"¿abcdefghijklmno",
"pqrstuvwxyzäöñüà");
DEFAULT_EX = join(
"\0\0\0\0\0\0\0\0\0\0\f\0\0\0\0\0",
"\0\0\0\0^\0\0\0\0\0\0\1\0\0\0\0",
"\0\0\0\0\0\0\0\0{}\0\0\0\0\0\\",
"\0\0\0\0\0\0\0\0\0\0\0\0[~]\0",
"|\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0",
"\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0",
"\0\0\0\0\0€\0\0\0\0\0\0\0\0\0\0",
"\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0");
TURKISH_LS = join(
"@£$¥€éùıòÇ\nĞğ\rÅå",
"Δ_ΦΓΛΩΠΨΣΘΞ\1ŞşßÉ",
" !\"#¤%&'()*+,-./",
"0123456789:;<=>?",
"İABCDEFGHIJKLMNO",
"PQRSTUVWXYZÄÖÑܧ",
"¿abcdefghijklmno",
"pqrstuvwxyzäöñüà");
TURKISH_SS = join(
"\0\0\0\0\0\0\0\0\0\0\f\0\0\b\0\0",
"\0\0\0\0^\0\0\0\0\0\0\1\0\0\0\0",
"\0\0\0\0\0\0\0\0{}\0\0\0\0\0\\",
"\0\0\0\0\0\0\0\0\0\0\0\0[~]\0",
"|\0\0\0\0\0\0Ğ\0İ\0\0\0\0\0\0",
"\0\0\0Ş\0\0\0\0\0\0\0\0\0\0\0\0",
"\0\0\0ç\0€\0ğ\0ı\0\0\0\0\0\0",
"\0\0\0ş\0\0\0\0\0\0\0\0\0\0\0\0");
SPANISH_SS = join(
"\0\0\0\0\0\0\0\0\0ç\f\0\0\0\0\0",
"\0\0\0\0^\0\0\0\0\0\0\1\0\0\0\0",
"\0\0\0\0\0\0\0\0{}\0\0\0\0\0\\",
"\0\0\0\0\0\0\0\0\0\0\0\0[~]\0",
"|Á\0\0\0\0\0\0\0Í\0\0\0\0\0Ó",
"\0\0\0\0\0Ú\0\0\0\0\0\0\0\0\0\0",
"\0\0\0\0\0€\0\0\0í\0\0\0\0\0ó",
"\0\0\0\0\0ú\0\0\0\0\0\0\0\0\0\0");
PORTUGUESE_LS = join(
"@£$¥êéúíóÇ\nÔô\rÁá",
"Δ_ªÇÀ∞^\\€Ó|\1ÂâÊê",
" !\"#º%&'()*+,-./",
"0123456789:;<=>?",
"ÍABCDEFGHIJKLMNO",
"PQRSTUVWXYZÃÕÚܧ",
"~abcdefghijklmno",
"pqrstuvwxyzãõ`üà");
PORTUGUESE_SS = join(
"\0\0\0\0\0ê\0\0\0ç\fÔô\bÁá",
"\0\0ΦΓ^ΩΠΨΣΘ\0\1\0\0\0Ê",
"\0\0\0\0\0\0\0\0{}\0\0\0\0\0\\",
"\0\0\0\0\0\0\0\0\0\0\0\0[~]\0",
"|À\0\0\0\0\0\0\0Í\0\0\0\0\0Ó",
"\0\0\0\0\0Ú\0\0\0\0\0ÃÕ\0\0\0",
"\0Â\0\0\0€\0\0\0í\0\0\0\0\0ó",
"\0\0\0\0\0ú\0\0\0\0\0ãõ\0\0â");
BENGALI_LS = join(
"ঁংঃঅআইঈউঊঋ\nঌ\0\r\0এ",
"ঐ\0\0ওঔকখগঘঙচ\1ছজঝঞ",
" !টঠডঢণত)(থদ,ধ.ন",
"0123456789:;\0পফ?",
"বভমযর\0ল\0\0\0শষসহ়ঽ",
"ািীুূৃৄ\0\0েৈ\0\0োৌ্",
"ৎabcdefghijklmno",
"pqrstuvwxyzৗডঢৰৱ");
BENGALI_SS = join(
"@£$¥¿\"¤%&'\f*+\b-/",
"<=>¡^¡_#*০১\1২৩৪৫",
"৬৭৮৯\2ৠৡৢ{}ৣ৲৳৴৵\\",
"৶৷৸৹৺\0\0\0\0\0\0\0[~]\0",
"|ABCDEFGHIJKLMNO",
"PQRSTUVWXYZ\0\0\0\0\0",
"\0\0\0\0\0€\0\0\0\0\0\0\0\0\0\0",
"\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0");
BENGALI_SX = Map.of((byte) 0x24, "য়");
GUJARATI_LS = join(
"ઁંઃઅઆઇઈઉઊઋ\nઌઍ\r\0એ",
"ઐઑ\0ઓઔકખગઘઙચ\1છજઝઞ",
" !ટઠડઢણત)(થદ,ધ.ન",
"0123456789:;\0પફ?",
"બભમયર\0લળ\0વશષસહ઼ઽ",
"ાિીુૂૃૄૅ\0ેૈૉ\0ોૌ્",
"ૐabcdefghijklmno",
"pqrstuvwxyzૠૡૢૣ૱");
GUJARATI_SS = join(
"@£$¥¿\"¤%&'\f*+\b-/",
"<=>¡^¡_#*।॥\1૦૧૨૩",
"૪૫૬૭૮૯\0\0{}\0\0\0\0\0\\",
"\0\0\0\0\0\0\0\0\0\0\0\0[~]\0",
"|ABCDEFGHIJKLMNO",
"PQRSTUVWXYZ\0\0\0\0\0",
"\0\0\0\0\0€\0\0\0\0\0\0\0\0\0\0",
"\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0");
HINDI_LS = join(
"ँंःअआइईउऊऋ\nऌऍ\rऎए",
"ऐऑऒओऔकखगघङच\1छजझञ",
" !टठडढणत)(थद,ध.न",
"0123456789:;ऩपफ?",
"बभमयरऱलळऴवशषसह़ऽ",
"ािीुूृॄॅॆेैॉॊोौ्",
"ॐabcdefghijklmno",
"pqrstuvwxyzॲॻॼॾॿ");
HINDI_SS = join(
"@£$¥¿\"¤%&'\f*+\b-/",
"<=>¡^¡_#*।॥\1०१२३",
"४५६७८९॒॑{}॓॔\2\2\2\\",
"\2\2\2\2\2ॠॡॢॣ॰ॱ\0[~]\0",
"|ABCDEFGHIJKLMNO",
"PQRSTUVWXYZ\0\0\0\0\0",
"\0\0\0\0\0€\0\0\0\0\0\0\0\0\0\0",
"\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0");
HINDI_SX = Map.of(
(byte) 0x2c, "क़",
(byte) 0x2d, "ख़",
(byte) 0x2e, "ग़",
(byte) 0x30, "ज़",
(byte) 0x31, "ड़",
(byte) 0x32, "ढ़",
(byte) 0x33, "फ़",
(byte) 0x34, "य़"
);
KANNADA_LS = join(
"\0ಂಃಅಆಇಈಉಊಋ\nಌ\0\rಎಏ",
"ಐ\0ಒಓಔಕಖಗಘಙಚ\1ಛಜಝಞ",
" !ಟಠಪಢಣತ)(ಥದ,ಧ.ನ",
"0123456789:;\0ಪಫ?",
"ಬಭಮಯರಱಲಳ\0ವಶಷಸಹ಼ಽ",
"ಾಿೀುೂೃೄ\0ೆೇೈ\0ೊೋೌ್",
"ೕabcdefghijklmno",
"pqrstuvwxyzೖೠೡೢೣ");
KANNADA_SS = join(
"@£$¥¿\"¤%&'\f*+\b-/",
"<=>¡^¡_#*।॥\1೦೧೨೩",
"೪೫೬೭೮೯ೞೱ{}ೲ\0\0\0\0\\",
"\0\0\0\0\0\0\0\0\0\0\0\0[~]\0",
"|ABCDEFGHIJKLMNO",
"PQRSTUVWXYZ\0\0\0\0\0",
"\0\0\0\0\0€\0\0\0\0\0\0\0\0\0\0",
"\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0");
MALAYALAM_LS = join(
"\0ംഃഅആഇഈഉഊഋ\nഌ\0\rഎഏ",
"ഐ\0ഒഓഔകഖഗഘങച\1ഛജഝഞ",
" !ടഠഡഢണത)(ഥദ,ധ.ന",
"0123456789:;\0പഫ?",
"ബഭമയരറലളഴവശഷസഹ\0ഽ",
"ാിീുൂൃൄ\0െേൈ\0ൊോൌ്",
"ൗabcdefghijklmno",
"pqrstuvwxyzൠൡൢൣ൹");
MALAYALAM_SS = join(
"@£$¥¿\"¤%&'\f*+\b-/",
"<=>¡^¡_#*।॥\1൦൧൨൩",
"൪൫൬൭൮൯൰൱{}൲൳൴൵ൺ\\",
"ൻർൽൾൿ\0\0\0\0\0\0\0[~]\0",
"-ABCDEFGHIJKLMNO",
"PQRSTUVWXYZ\0\0\0\0\0",
"\0\0\0\0\0€\0\0\0\0\0\0\0\0\0\0",
"\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0");
ORIYA_LS = join(
"ଁଂଃଅଆଇଈଉଊଋ\nଌ\0\r\0ଏ",
"ଐ\0\0ଓଔକଖଗଘଙଚ\1ଛଜଝଞ",
" !ଟଠଡଢଣତ)(ଥଦ,ଧ.ନ",
"0123456789:;\0ପଫ?",
"ବଭମଯର\0ଲଳ\0ଵଶଷସହ଼ଽ",
"ାିୀୁୂୃୄ\0\0େୈ\0\0ୋୌ୍",
"ୖabcdefghijklmno",
"pqrstuvwxyzୗୠୡୢୣ");
ORIYA_SS = join(
"@£$¥¿\"¤%&'\f*+\b-/",
"<=>¡^¡_#*।॥\1୦୧୨୩",
"୪୫୬୭୮୯\2\2{}ୟ୰ୱ\0\0\\",
"\0\0\0\0\0\0\0\0\0\0\0\0[~]\0",
"|ABCDEFGHIJKLMNO",
"PQRSTUVWXYZ\0\0\0\0\0",
"\0\0\0\0\0€\0\0\0\0\0\0\0\0\0\0",
"\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0");
ORIYA_SX = Map.of(
(byte) 0x26, "ଡ଼",
(byte) 0x27, "ଢ଼"
);
PUNJABI_LS = join(
"ਁਂਃਅਆਇਈਉਊ\0\n\0\0\r\0ਏ",
"ਐ\0\0ਓਔਕਖਗਘਙਚ\1ਛਜਝਞ",
" !ਟਠਡਢਣਤ)(ਥਦ,ਧ.ਨ",
"0123456789:;\0ਪਫ?",
"ਬਭਮਯਰ\0ਲ\2\0ਵ\2\0ਸਹ਼\0",
"ਾਿੀੁੂ\0\0\0\0ੇੈ\0\0ੋੌ੍",
"ੑabcdefghijklmno",
"pqrstuvwxyzੰੱੲੳੴ");
PUNJABI_SS = join(
"@£$¥¿\"¤%&'\f*+\b-/",
"<=>¡^¡_#*।॥\1੦੧੨੩",
"੪੫੬੭੮੯\2\2{}\2ੜ\2ੵ\0\\",
"\0\0\0\0\0\0\0\0\0\0\0\0[~]\0",
"|ABCDEFGHIJKLMNO",
"PQRSTUVWXYZ\0\0\0\0\0",
"\0\0\0\0\0€\0\0\0\0\0\0\0\0\0\0",
"\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0");
PUNJABI_LX = Map.of(
(byte) 0x47, "ਲ਼",
(byte) 0x4a, "ਸ਼"
);
PUNJABI_SX = Map.of(
(byte) 0x26, "ਖ਼",
(byte) 0x27, "ਗ਼",
(byte) 0x2a, "ਜ਼",
(byte) 0x2c, "ਫ਼"
);
TAMIL_LS = join(
"\0ஂஃஅஆஇஈஉஊ\0\n\0\0\rஎஏ",
"ஐ\0ஒஓஔக\0\0\0ஙச\1\0ஜ\0ஞ",
" !ட\0\0\0ணத)(\0\0,\0.ந",
"0123456789:;னப\0?",
"\0\0மயரறலளழவஶஷஸஹ\0\0",
"ாிீுூ\0\0\0ெேை\0ொோௌ்",
"ੑabcdefghijklmno",
"pqrstuvwxyzௗ௰௱௲௹");
TAMIL_SS = join(
"@£$¥¿\"¤%&'\f*+\b-/",
"<=>¡^¡_#*।॥\1௦௧௨௩",
"௪௫௬௭௮௯௳௴{}௵௶௷௸௺\\",
"\0\0\0\0\0\0\0\0\0\0\0\0[~]\0",
"|ABCDEFGHIJKLMNO",
"PQRSTUVWXYZ\0\0\0\0\0",
"\0\0\0\0\0€\0\0\0\0\0\0\0\0\0\0",
"\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0");
TELUGU_LS = join(
"ఁంఃఅఆఇఈఉఊఋ\nఌ\0\rఎఏ",
"ఐ\0ఒఓఔకఖగఘఙచ\1ఛజఝఞ",
" !టఠడఢణత)(థద,ధ.న",
"0123456789:;\0పఫ?",
"బభమయరఱలళ\0వశషసహ\0ఽ",
"ాిీుూృౄ\0ెేై\0ొోౌ్",
"ౕabcdefghijklmno",
"pqrstuvwxyzౖౠౡౢౣ");
TELUGU_SS = join(
"@£$¥¿\"¤%&'\f*+\b-/",
"<=>¡^¡_#*\0\0\1౦౧౨౩",
"౪౫౬౭౮౯ౘౙ{}౸౹౺౻౼\\",
"౽౾౿\0\0\0\0\0\0\0\0\0[~]\0",
"|ABCDEFGHIJKLMNO",
"PQRSTUVWXYZ\0\0\0\0\0",
"\0\0\0\0\0€\0\0\0\0\0\0\0\0\0\0",
"\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0");
URDU_LS = join(
"اآبٻڀپڦتۂٿ\nٹٽ\rٺټ",
"ثجځڄڃڅچڇحخد\1ڌڈډڊ",
" !ڏڍذرڑړ)(ڙز,ږ.ژ",
"0123456789:;ښسش?",
"صضطظعفقکڪګگڳڱلمن",
"ںڻڼوۄەہھءیېےٍُِٗ",
"ٔabcdefghijklmno",
"pqrstuvwxyzّٰٕٖٓ");
URDU_SS = join(
"@£$¥¿\"¤%&'\f*+\b-/",
"<=>¡^¡_#*\1۰۱۲۳",
"۴۵۶۷۸۹،؍{}؎؏ؐؑؒ\\",
"ؓؔ؛؟ـْ٘٫٬ٲٳۍ[~]۔",
"|ABCDEFGHIJKLMNO",
"PQRSTUVWXYZ\0\0\0\0\0",
"\0\0\0\0\0€\0\0\0\0\0\0\0\0\0\0",
"\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0");
}
public static String join(String... parts) {
StringBuilder b = new StringBuilder();
// if (parts.length != 8) {
// throw new IllegalArgumentException("l=" + parts.length);
// }
for (int i = 0; i < parts.length; ++i) {
// if (parts[i].length() != 16) {
// throw new IllegalArgumentException(format("[%d].l = %d", i, parts[i].length()));
// }
b.append(parts[i]);
}
return b.toString();
}
public static Map<Character, Byte> reverse(String table) {
if (table == null) return null;
if (table.length() != 128) throw new IllegalArgumentException("Table length == " + table.length());
Map<Character, Byte> out = new HashMap<>();
for (int i = 0; i < 128; ++i) {
char c = table.charAt(i);
if (c > EXT_CODE) {
out.put(c, (byte) i);
}
// else if (c == EXT_CODE) {
// if (!tableLong.containsKey((byte) i)) {
// throw new IllegalArgumentException(format("No long variant for %02x", i));
// }
// }
}
return Map.copyOf(out);
}
public static Set<Character> firstChars(Map<Byte, String> of) {
Set<Character> out = new HashSet<>();
for (String s : of.values()) {
out.add(s.charAt(0));
}
return Set.copyOf(out);
}
public static Map<String, Byte> flip(Map<Byte, String> map) {
LinkedHashMap<String, Byte> flipped = new LinkedHashMap<>();
for (Map.Entry<Byte, String> entry : map.entrySet()) {
// if (entry.getValue().length() != 2) {
// throw new IllegalArgumentException(format("%02x -> '%s' l=%d",
// entry.getKey(), entry.getValue(), entry.getValue().length()));
// }
// if (ssTable.charAt(entry.getKey()) != EXT_CODE) {
// throw new IllegalArgumentException(format("%02x is not ext : \"%s\", is '%c'",
// entry.getKey(), entry.getValue(), ssTable.charAt(entry.getKey())));
// }
flipped.put(entry.getValue(), entry.getKey());
}
return Map.copyOf(flipped);
}
}