GSMNationalLanguageIdentifier.java

/*
 * Copyright (c) 2020, Stein Eldar Johnsen
 *
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements. See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership. The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License. You may obtain a copy of the License at
 *
 *   http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing,
 * software distributed under the License is distributed on an
 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 * KIND, either express or implied. See the License for the
 * specific language governing permissions and limitations
 * under the License.
 */
package net.morimekta.strings.enc;

import java.util.Map;
import java.util.Set;

import static net.morimekta.strings.internal.GSMCharsetUtil.BENGALI_LS;
import static net.morimekta.strings.internal.GSMCharsetUtil.BENGALI_SS;
import static net.morimekta.strings.internal.GSMCharsetUtil.BENGALI_SX;
import static net.morimekta.strings.internal.GSMCharsetUtil.DEFAULT_BC;
import static net.morimekta.strings.internal.GSMCharsetUtil.DEFAULT_EX;
import static net.morimekta.strings.internal.GSMCharsetUtil.GUJARATI_LS;
import static net.morimekta.strings.internal.GSMCharsetUtil.GUJARATI_SS;
import static net.morimekta.strings.internal.GSMCharsetUtil.HINDI_LS;
import static net.morimekta.strings.internal.GSMCharsetUtil.HINDI_SS;
import static net.morimekta.strings.internal.GSMCharsetUtil.HINDI_SX;
import static net.morimekta.strings.internal.GSMCharsetUtil.KANNADA_LS;
import static net.morimekta.strings.internal.GSMCharsetUtil.KANNADA_SS;
import static net.morimekta.strings.internal.GSMCharsetUtil.MALAYALAM_LS;
import static net.morimekta.strings.internal.GSMCharsetUtil.MALAYALAM_SS;
import static net.morimekta.strings.internal.GSMCharsetUtil.ORIYA_LS;
import static net.morimekta.strings.internal.GSMCharsetUtil.ORIYA_SS;
import static net.morimekta.strings.internal.GSMCharsetUtil.ORIYA_SX;
import static net.morimekta.strings.internal.GSMCharsetUtil.PORTUGUESE_LS;
import static net.morimekta.strings.internal.GSMCharsetUtil.PORTUGUESE_SS;
import static net.morimekta.strings.internal.GSMCharsetUtil.PUNJABI_LS;
import static net.morimekta.strings.internal.GSMCharsetUtil.PUNJABI_LX;
import static net.morimekta.strings.internal.GSMCharsetUtil.PUNJABI_SS;
import static net.morimekta.strings.internal.GSMCharsetUtil.PUNJABI_SX;
import static net.morimekta.strings.internal.GSMCharsetUtil.SPANISH_SS;
import static net.morimekta.strings.internal.GSMCharsetUtil.TAMIL_LS;
import static net.morimekta.strings.internal.GSMCharsetUtil.TAMIL_SS;
import static net.morimekta.strings.internal.GSMCharsetUtil.TELUGU_LS;
import static net.morimekta.strings.internal.GSMCharsetUtil.TELUGU_SS;
import static net.morimekta.strings.internal.GSMCharsetUtil.TURKISH_LS;
import static net.morimekta.strings.internal.GSMCharsetUtil.TURKISH_SS;
import static net.morimekta.strings.internal.GSMCharsetUtil.URDU_LS;
import static net.morimekta.strings.internal.GSMCharsetUtil.URDU_SS;
import static net.morimekta.strings.internal.GSMCharsetUtil.firstChars;
import static net.morimekta.strings.internal.GSMCharsetUtil.flip;
import static net.morimekta.strings.internal.GSMCharsetUtil.reverse;

/**
 * The locale of the GSM determines the national single shift or locking
 * shift table to be used.
 */
public enum GSMNationalLanguageIdentifier {
    /**
     * Used for most latin based languages.
     */
    Default(0, DEFAULT_BC, DEFAULT_EX, null, null),
    /**
     * Used for turkish.
     */
    Turkish(1, TURKISH_LS, TURKISH_SS, "tur", "tr"),
    /**
     * Used for spanish and spanish variants.
     */
    Spanish(2, null, SPANISH_SS, "esp", "es"),
    /**
     * Used for portuguese and portuguese variants.
     */
    Portuguese(3, PORTUGUESE_LS, PORTUGUESE_SS, "por", "pt"),
    /**
     * Used for bengali.
     */
    Bengali(4, BENGALI_LS, BENGALI_SS, BENGALI_SX, "ben", "bn"),
    /**
     * Used for gujarati.
     */
    Gujarati(5, GUJARATI_LS, GUJARATI_SS, "guj", "gu"),
    /**
     * Used for hindi or other sanskrit based languages.
     */
    Hindi(6, HINDI_LS, HINDI_SS, HINDI_SX, "hin", "hi"),
    /**
     * Used for kannada.
     */
    Kannada(7, KANNADA_LS, KANNADA_SS, "kan", "kn"),
    /**
     * Used for malayalam.
     */
    Malayalam(8, MALAYALAM_LS, MALAYALAM_SS, "mal", "ml"),
    /**
     * Used for oriya.
     */
    Oriya(9, ORIYA_LS, ORIYA_SS, ORIYA_SX, "ori", "or"),
    /**
     * Used for punjabi.
     */
    Punjabi(10, PUNJABI_LS, PUNJABI_LX, PUNJABI_SS, PUNJABI_SX, "pun", "pn"),
    /**
     * Used for tamil, both Indian and Sri-Lankan Tamil.
     */
    Tamil(11, TAMIL_LS, TAMIL_SS, "tam", "ta"),
    /**
     * Used for telugu.
     */
    Telugu(13, TELUGU_LS, TELUGU_SS, "tel", "te"),
    /**
     * Used for urdu and arabic languages.
     */
    Urdu(14, URDU_LS, URDU_SS, "urd", "ur"),
    // 15 - 127 reserved
    ;
    /**
     * The identifier ID for the language.
     */
    final public int    code;
    /**
     * The 2-letter ISO code for the language.
     */
    final public String iso639_1;
    /**
     * The 3-letter ISO code for the language.
     */
    final public String iso639_2;

    final String               basic;
    final String               shift;
    final Map<Character, Byte> basicMap;
    final Map<Byte, String>    basicCodeString;
    final Map<String, Byte>    basicStringCode;
    final Set<Character>       basicExtended;
    final Map<Character, Byte> shiftMap;
    final Map<Byte, String>    shiftCodeString;
    final Map<String, Byte>    shiftStringCode;
    final Set<Character>       shiftExtended;

    GSMNationalLanguageIdentifier(int code,
                                  String basic,
                                  String shift,
                                  String iso639_2,
                                  String iso639_1) {
        this(code, basic, shift, Map.of(), iso639_2, iso639_1);
    }

    GSMNationalLanguageIdentifier(int code,
                                  String basic,
                                  String shift,
                                  Map<Byte, String> shiftCodeString,
                                  String iso639_2,
                                  String iso639_1) {
        this(code, basic, Map.of(), shift, shiftCodeString, iso639_2, iso639_1);
    }

    GSMNationalLanguageIdentifier(int code,
                                  String basic,
                                  Map<Byte, String> basicCodeString,
                                  String shift,
                                  Map<Byte, String> shiftCodeString,
                                  String iso639_2,
                                  String iso639_1) {
        this.code = code;
        this.basic = basic;
        this.shift = shift;
        this.basicMap = reverse(basic);
        this.basicCodeString = Map.copyOf(basicCodeString);
        this.basicStringCode = flip(basicCodeString);
        this.basicExtended = firstChars(basicCodeString);
        this.shiftMap = reverse(shift);
        this.shiftCodeString = Map.copyOf(shiftCodeString);
        this.shiftStringCode = flip(shiftCodeString);
        this.shiftExtended = firstChars(shiftCodeString);

        this.iso639_1 = iso639_1;
        this.iso639_2 = iso639_2;
    }

    /**
     * Get NLI for the given ISO-639 2 or 3-letter country code.
     *
     * @param iso ISO639 code (2 or 3-letter) for the language.
     * @return The language identifier.
     * @throws IllegalArgumentException If no language for iso code.
     */
    public static GSMNationalLanguageIdentifier forIso639(String iso) {
        if (iso == null) {
            return Default;
        }
        for (GSMNationalLanguageIdentifier nli : values()) {
            if (iso.equals(nli.iso639_1) || iso.equals(nli.iso639_2)) {
                return nli;
            }
        }
        throw new IllegalArgumentException("No language id for " + iso + " for 3GPP-23.038");
    }

    /**
     * Get NLI for the given numeric code value.
     *
     * @param code The numeric NLI ID number (code) for the language.
     * @return The language identifier.
     * @throws IllegalArgumentException If no language for ID.
     */
    public static GSMNationalLanguageIdentifier forCode(int code) {
        for (GSMNationalLanguageIdentifier nli : values()) {
            if (nli.code == code) {
                return nli;
            }
        }
        throw new IllegalArgumentException("No language identifier for code " + code);
    }
}