MoreCharsetsProvider.java
- /*
- * Copyright (c) 2020, Stein Eldar Johnsen
- *
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied. See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
- package net.morimekta.strings.enc;
- import java.nio.charset.Charset;
- import java.nio.charset.spi.CharsetProvider;
- import java.util.Iterator;
- import java.util.List;
- import java.util.regex.Matcher;
- import java.util.regex.Pattern;
- import static net.morimekta.strings.enc.GSMNationalLanguageIdentifier.forIso639;
- /**
- * Charset provider for GSM, UCS2, T.61 and TBCD charsets.
- */
- public class MoreCharsetsProvider extends CharsetProvider {
- /**
- * Create the charset provider.
- */
- public MoreCharsetsProvider() {}
- private static final List<Charset> CHARSETS = List.of(
- T61Charset.T61,
- TBCDCharset.TBCD,
- TBCDCharset.TBCD_ODD,
- GSMCharset.UCS2,
- GSMCharset.GSM,
- GSMCharset.GSM_Turkish,
- GSMCharset.GSM_Spanish,
- GSMCharset.GSM_Portuguese,
- GSMCharset.GSM_Bengali,
- GSMCharset.GSM_Gujarati,
- GSMCharset.GSM_Hindi,
- GSMCharset.GSM_Kannada,
- GSMCharset.GSM_Malayalam,
- GSMCharset.GSM_Oriya,
- GSMCharset.GSM_Punjabi,
- GSMCharset.GSM_Tamil,
- GSMCharset.GSM_Telugu,
- GSMCharset.GSM_Urdu);
- @Override
- public Iterator<Charset> charsets() {
- return CHARSETS.iterator();
- }
- @Override
- public Charset charsetForName(String name) {
- switch (name) {
- case "T.61":
- case "CP1036":
- case "IBM-01036":
- return T61Charset.T61;
- case "BCD":
- case "TBCD":
- return TBCDCharset.TBCD;
- case "BCD-odd":
- case "TBCD-odd":
- return TBCDCharset.TBCD_ODD;
- case "UCS2":
- return GSMCharset.UCS2;
- case "GSM":
- case "3GPP-23.038":
- return GSMCharset.GSM;
- case "GSM-tr":
- case "3GPP-23.038-tr+tr":
- return GSMCharset.GSM_Turkish;
- case "GSM-es":
- case "3GPP-23.038+es":
- return GSMCharset.GSM_Spanish;
- case "GSM-pt":
- case "3GPP-23.038-pt+pt":
- return GSMCharset.GSM_Portuguese;
- case "GSM-bn":
- case "3GPP-23.038-be+be":
- return GSMCharset.GSM_Bengali;
- case "GSM-gu":
- case "3GPP-23.038-gu+gu":
- return GSMCharset.GSM_Gujarati;
- case "GSM-hi":
- case "3GPP-23.038-hi+hi":
- return GSMCharset.GSM_Hindi;
- case "GSM-kn":
- case "3GPP-23.038-kn+kn":
- return GSMCharset.GSM_Kannada;
- case "GSM-ml":
- case "3GPP-23.038-ml+ml":
- return GSMCharset.GSM_Malayalam;
- case "GSM-or":
- case "3GPP-23.038-or+or":
- return GSMCharset.GSM_Oriya;
- case "GSM-pa":
- case "3GPP-23.038-pa+pa":
- return GSMCharset.GSM_Punjabi;
- case "GSM-ta":
- case "3GPP-23.038-ta+ta":
- return GSMCharset.GSM_Tamil;
- case "GSM-te":
- case "3GPP-23.038-te+te":
- return GSMCharset.GSM_Telugu;
- case "GSM-ur":
- case "3GPP-23.038-ur+ur":
- return GSMCharset.GSM_Urdu;
- default:
- if (name.startsWith("3GPP-23.038")) {
- Matcher matcher = LOCALE_3GPP_PATTERN.matcher(name);
- if (matcher.matches()) {
- var locking = forIso639(matcher.group("locking"));
- var shift = forIso639(matcher.group("shift"));
- return GSMCharset.forNationalLanguageIdentifier(locking, shift);
- }
- }
- return null;
- }
- }
- private static final Pattern LOCALE_3GPP_PATTERN = Pattern.compile(
- "3GPP-23\\.038" +
- "(-(?<locking>[a-z][a-z][a-z]?))?" +
- "(\\+(?<shift>[a-z][a-z][a-z]?))?");
- }