// -*- C++ -*- // Copyright The GNU Toolchain Authors. // // This file is part of the GNU ISO C++ Library. This library is free // software; you can redistribute it and/or modify it under the // terms of the GNU General Public License as published by the // Free Software Foundation; either version 3, or (at your option) // any later version. // This library is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the // GNU General Public License for more details. // Under Section 7 of GPL version 3, you are granted additional // permissions described in the GCC Runtime Library Exception, version // 3.1, as published by the Free Software Foundation. // You should have received a copy of the GNU General Public License and // a copy of the GCC Runtime Library Exception along with this program; // see the files COPYING3 and COPYING.RUNTIME respectively. If not, see // . /** @file include/text_encoding * This is a Standard C++ Library header. */ #ifndef _GLIBCXX_TEXT_ENCODING #define _GLIBCXX_TEXT_ENCODING #pragma GCC system_header #include #define __glibcxx_want_text_encoding #include #ifdef __cpp_lib_text_encoding #include #include #include // hash #include // view_interface #include // __charset_alias_match #include // __int_traits namespace std _GLIBCXX_VISIBILITY(default) { _GLIBCXX_BEGIN_NAMESPACE_VERSION /** * @brief An interface for accessing the IANA Character Sets registry. * @ingroup locales * @since C++23 */ struct text_encoding { private: struct _Rep { using id = __INT_LEAST32_TYPE__; id _M_id; const char* _M_name; friend constexpr bool operator<(const _Rep& __r, id __m) noexcept { return __r._M_id < __m; } friend constexpr bool operator==(const _Rep& __r, string_view __name) noexcept { return __r._M_name == __name; } }; public: static constexpr size_t max_name_length = 63; enum class id : _Rep::id { other = 1, unknown = 2, ASCII = 3, ISOLatin1 = 4, ISOLatin2 = 5, ISOLatin3 = 6, ISOLatin4 = 7, ISOLatinCyrillic = 8, ISOLatinArabic = 9, ISOLatinGreek = 10, ISOLatinHebrew = 11, ISOLatin5 = 12, ISOLatin6 = 13, ISOTextComm = 14, HalfWidthKatakana = 15, JISEncoding = 16, ShiftJIS = 17, EUCPkdFmtJapanese = 18, EUCFixWidJapanese = 19, ISO4UnitedKingdom = 20, ISO11SwedishForNames = 21, ISO15Italian = 22, ISO17Spanish = 23, ISO21German = 24, ISO60DanishNorwegian = 25, ISO69French = 26, ISO10646UTF1 = 27, ISO646basic1983 = 28, INVARIANT = 29, ISO2IntlRefVersion = 30, NATSSEFI = 31, NATSSEFIADD = 32, ISO10Swedish = 35, KSC56011987 = 36, ISO2022KR = 37, EUCKR = 38, ISO2022JP = 39, ISO2022JP2 = 40, ISO13JISC6220jp = 41, ISO14JISC6220ro = 42, ISO16Portuguese = 43, ISO18Greek7Old = 44, ISO19LatinGreek = 45, ISO25French = 46, ISO27LatinGreek1 = 47, ISO5427Cyrillic = 48, ISO42JISC62261978 = 49, ISO47BSViewdata = 50, ISO49INIS = 51, ISO50INIS8 = 52, ISO51INISCyrillic = 53, ISO54271981 = 54, ISO5428Greek = 55, ISO57GB1988 = 56, ISO58GB231280 = 57, ISO61Norwegian2 = 58, ISO70VideotexSupp1 = 59, ISO84Portuguese2 = 60, ISO85Spanish2 = 61, ISO86Hungarian = 62, ISO87JISX0208 = 63, ISO88Greek7 = 64, ISO89ASMO449 = 65, ISO90 = 66, ISO91JISC62291984a = 67, ISO92JISC62991984b = 68, ISO93JIS62291984badd = 69, ISO94JIS62291984hand = 70, ISO95JIS62291984handadd = 71, ISO96JISC62291984kana = 72, ISO2033 = 73, ISO99NAPLPS = 74, ISO102T617bit = 75, ISO103T618bit = 76, ISO111ECMACyrillic = 77, ISO121Canadian1 = 78, ISO122Canadian2 = 79, ISO123CSAZ24341985gr = 80, ISO88596E = 81, ISO88596I = 82, ISO128T101G2 = 83, ISO88598E = 84, ISO88598I = 85, ISO139CSN369103 = 86, ISO141JUSIB1002 = 87, ISO143IECP271 = 88, ISO146Serbian = 89, ISO147Macedonian = 90, ISO150 = 91, ISO151Cuba = 92, ISO6937Add = 93, ISO153GOST1976874 = 94, ISO8859Supp = 95, ISO10367Box = 96, ISO158Lap = 97, ISO159JISX02121990 = 98, ISO646Danish = 99, USDK = 100, DKUS = 101, KSC5636 = 102, Unicode11UTF7 = 103, ISO2022CN = 104, ISO2022CNEXT = 105, UTF8 = 106, ISO885913 = 109, ISO885914 = 110, ISO885915 = 111, ISO885916 = 112, GBK = 113, GB18030 = 114, OSDEBCDICDF0415 = 115, OSDEBCDICDF03IRV = 116, OSDEBCDICDF041 = 117, ISO115481 = 118, KZ1048 = 119, UCS2 = 1000, UCS4 = 1001, UnicodeASCII = 1002, UnicodeLatin1 = 1003, UnicodeJapanese = 1004, UnicodeIBM1261 = 1005, UnicodeIBM1268 = 1006, UnicodeIBM1276 = 1007, UnicodeIBM1264 = 1008, UnicodeIBM1265 = 1009, Unicode11 = 1010, SCSU = 1011, UTF7 = 1012, UTF16BE = 1013, UTF16LE = 1014, UTF16 = 1015, CESU8 = 1016, UTF32 = 1017, UTF32BE = 1018, UTF32LE = 1019, BOCU1 = 1020, UTF7IMAP = 1021, Windows30Latin1 = 2000, Windows31Latin1 = 2001, Windows31Latin2 = 2002, Windows31Latin5 = 2003, HPRoman8 = 2004, AdobeStandardEncoding = 2005, VenturaUS = 2006, VenturaInternational = 2007, DECMCS = 2008, PC850Multilingual = 2009, PC8DanishNorwegian = 2012, PC862LatinHebrew = 2013, PC8Turkish = 2014, IBMSymbols = 2015, IBMThai = 2016, HPLegal = 2017, HPPiFont = 2018, HPMath8 = 2019, HPPSMath = 2020, HPDesktop = 2021, VenturaMath = 2022, MicrosoftPublishing = 2023, Windows31J = 2024, GB2312 = 2025, Big5 = 2026, Macintosh = 2027, IBM037 = 2028, IBM038 = 2029, IBM273 = 2030, IBM274 = 2031, IBM275 = 2032, IBM277 = 2033, IBM278 = 2034, IBM280 = 2035, IBM281 = 2036, IBM284 = 2037, IBM285 = 2038, IBM290 = 2039, IBM297 = 2040, IBM420 = 2041, IBM423 = 2042, IBM424 = 2043, PC8CodePage437 = 2011, IBM500 = 2044, IBM851 = 2045, PCp852 = 2010, IBM855 = 2046, IBM857 = 2047, IBM860 = 2048, IBM861 = 2049, IBM863 = 2050, IBM864 = 2051, IBM865 = 2052, IBM868 = 2053, IBM869 = 2054, IBM870 = 2055, IBM871 = 2056, IBM880 = 2057, IBM891 = 2058, IBM903 = 2059, IBM904 = 2060, IBM905 = 2061, IBM918 = 2062, IBM1026 = 2063, IBMEBCDICATDE = 2064, EBCDICATDEA = 2065, EBCDICCAFR = 2066, EBCDICDKNO = 2067, EBCDICDKNOA = 2068, EBCDICFISE = 2069, EBCDICFISEA = 2070, EBCDICFR = 2071, EBCDICIT = 2072, EBCDICPT = 2073, EBCDICES = 2074, EBCDICESA = 2075, EBCDICESS = 2076, EBCDICUK = 2077, EBCDICUS = 2078, Unknown8BiT = 2079, Mnemonic = 2080, Mnem = 2081, VISCII = 2082, VIQR = 2083, KOI8R = 2084, HZGB2312 = 2085, IBM866 = 2086, PC775Baltic = 2087, KOI8U = 2088, IBM00858 = 2089, IBM00924 = 2090, IBM01140 = 2091, IBM01141 = 2092, IBM01142 = 2093, IBM01143 = 2094, IBM01144 = 2095, IBM01145 = 2096, IBM01146 = 2097, IBM01147 = 2098, IBM01148 = 2099, IBM01149 = 2100, Big5HKSCS = 2101, IBM1047 = 2102, PTCP154 = 2103, Amiga1251 = 2104, KOI7switched = 2105, BRF = 2106, TSCII = 2107, CP51932 = 2108, windows874 = 2109, windows1250 = 2250, windows1251 = 2251, windows1252 = 2252, windows1253 = 2253, windows1254 = 2254, windows1255 = 2255, windows1256 = 2256, windows1257 = 2257, windows1258 = 2258, TIS620 = 2259, CP50220 = 2260 }; using enum id; constexpr text_encoding() = default; constexpr explicit text_encoding(string_view __enc) noexcept : _M_rep(_S_find_name(__enc)) { __enc.copy(_M_name, max_name_length); } // @pre i has the value of one of the enumerators of id. constexpr text_encoding(id __i) noexcept : _M_rep(_S_find_id(__i)) { if (string_view __name(_M_rep->_M_name); !__name.empty()) __name.copy(_M_name, max_name_length); } constexpr id mib() const noexcept { return id(_M_rep->_M_id); } constexpr const char* name() const noexcept { return _M_name; } struct aliases_view : ranges::view_interface { private: class _Iterator; struct _Sentinel { }; public: constexpr _Iterator begin() const noexcept; constexpr _Sentinel end() const noexcept { return {}; } private: friend struct text_encoding; constexpr explicit aliases_view(const _Rep* __r) : _M_begin(__r) { } const _Rep* _M_begin = nullptr; }; constexpr aliases_view aliases() const noexcept { return _M_rep->_M_name[0] ? aliases_view(_M_rep) : aliases_view{nullptr}; } friend constexpr bool operator==(const text_encoding& __a, const text_encoding& __b) noexcept { if (__a.mib() == id::other && __b.mib() == id::other) [[unlikely]] return _S_comp(__a._M_name, __b._M_name); else return __a.mib() == __b.mib(); } friend constexpr bool operator==(const text_encoding& __encoding, id __i) noexcept { return __encoding.mib() == __i; } #if __CHAR_BIT__ == 8 static consteval text_encoding literal() noexcept { #ifdef __GNUC_EXECUTION_CHARSET_NAME return text_encoding(__GNUC_EXECUTION_CHARSET_NAME); #elif defined __clang_literal_encoding__ return text_encoding(__clang_literal_encoding__); #else return text_encoding(); #endif } static text_encoding environment(); template static bool environment_is() { return text_encoding(_Id)._M_is_environment(); } #else static text_encoding literal() = delete; static text_encoding environment() = delete; template static bool environment_is() = delete; #endif private: const _Rep* _M_rep = _S_reps + 1; // id::unknown char _M_name[max_name_length + 1] = {0}; bool _M_is_environment() const; static inline constexpr _Rep _S_reps[] = { { 1, "" }, { 2, "" }, #define _GLIBCXX_GET_ENCODING_DATA #include #ifdef _GLIBCXX_GET_ENCODING_DATA # error "Invalid text_encoding data" #endif { 9999, nullptr }, // sentinel }; static constexpr bool _S_comp(string_view __a, string_view __b) { return __unicode::__charset_alias_match(__a, __b); } static constexpr const _Rep* _S_find_name(string_view __name) noexcept { #ifdef _GLIBCXX_TEXT_ENCODING_UTF8_OFFSET // Optimize the common UTF-8 case to avoid a linear search through all // strings in the table using the _S_comp function. if (__name == "UTF-8") return _S_reps + 2 + _GLIBCXX_TEXT_ENCODING_UTF8_OFFSET; #endif // The first two array elements (other and unknown) don't have names. // The last element is a sentinel that can never match anything. const auto __first = _S_reps + 2, __end = std::end(_S_reps) - 1; for (auto __r = __first; __r != __end; ++__r) if (_S_comp(__r->_M_name, __name)) { // Might have matched an alias. Find the first entry for this ID. const auto __id = __r->_M_id; while (__r[-1]._M_id == __id) --__r; return __r; } return _S_reps; // id::other } static constexpr const _Rep* _S_find_id(id __id) noexcept { const auto __i = (_Rep::id)__id; const auto __r = std::lower_bound(_S_reps, std::end(_S_reps) - 1, __i); if (__r->_M_id == __i) [[likely]] return __r; else { // Preconditions: i has the value of one of the enumerators of id. __glibcxx_assert(__r->_M_id == __i); return _S_reps + 1; // id::unknown } } }; template<> struct hash { size_t operator()(const text_encoding& __enc) const noexcept { return std::hash()(__enc.mib()); } }; class text_encoding::aliases_view::_Iterator { public: using value_type = const char*; using reference = const char*; using difference_type = int; constexpr _Iterator() = default; constexpr value_type operator*() const { if (_M_dereferenceable()) [[likely]] return _M_rep->_M_name; else { __glibcxx_assert(_M_dereferenceable()); return ""; } } constexpr _Iterator& operator++() { if (_M_dereferenceable()) [[likely]] ++_M_rep; else { __glibcxx_assert(_M_dereferenceable()); *this = _Iterator{}; } return *this; } constexpr _Iterator& operator--() { const bool __decrementable = _M_rep != nullptr && _M_rep[-1]._M_id == _M_id; if (__decrementable) [[likely]] --_M_rep; else { __glibcxx_assert(__decrementable); *this = _Iterator{}; } return *this; } constexpr _Iterator operator++(int) { auto __it = *this; ++*this; return __it; } constexpr _Iterator operator--(int) { auto __it = *this; --*this; return __it; } constexpr value_type operator[](difference_type __n) const { return *(*this + __n); } constexpr _Iterator& operator+=(difference_type __n) { if (_M_rep != nullptr) { if (__n > 0) { if (__n < (std::end(_S_reps) - _M_rep) && _M_rep[__n - 1]._M_id == _M_id) [[likely]] _M_rep += __n; else *this == _Iterator{}; } else if (__n < 0) { if (__n > (_S_reps - _M_rep) && _M_rep[__n]._M_id == _M_id) [[likely]] _M_rep += __n; else *this == _Iterator{}; } } if (__n != 0) __glibcxx_assert(_M_rep != nullptr); return *this; } constexpr _Iterator& operator-=(difference_type __n) { using _Traits = __gnu_cxx::__int_traits; if (__n == _Traits::__min) [[unlikely]] return operator+=(_Traits::__max); return operator+=(-__n); } constexpr difference_type operator-(const _Iterator& __i) const { if (_M_id == __i._M_id) return _M_rep - __i._M_rep; __glibcxx_assert(_M_id == __i._M_id); return __gnu_cxx::__int_traits::__max; } constexpr bool operator==(const _Iterator&) const = default; constexpr bool operator==(_Sentinel) const noexcept { return !_M_dereferenceable(); } constexpr strong_ordering operator<=>(const _Iterator& __i) const { __glibcxx_assert(_M_id == __i._M_id); return _M_rep <=> __i._M_rep; } friend constexpr _Iterator operator+(_Iterator __i, difference_type __n) { __i += __n; return __i; } friend constexpr _Iterator operator+(difference_type __n, _Iterator __i) { __i += __n; return __i; } friend constexpr _Iterator operator-(_Iterator __i, difference_type __n) { __i -= __n; return __i; } private: friend class text_encoding; constexpr explicit _Iterator(const _Rep* __r) noexcept : _M_rep(__r), _M_id(__r ? __r->_M_id : 0) { } constexpr bool _M_dereferenceable() const noexcept { return _M_rep != nullptr && _M_rep->_M_id == _M_id; } const _Rep* _M_rep = nullptr; _Rep::id _M_id = 0; }; constexpr auto text_encoding::aliases_view::begin() const noexcept -> _Iterator { return _Iterator(_M_begin); } namespace ranges { // Opt-in to borrowed_range concept template<> inline constexpr bool enable_borrowed_range = true; } _GLIBCXX_END_NAMESPACE_VERSION } // namespace std #endif // __cpp_lib_text_encoding #endif // _GLIBCXX_TEXT_ENCODING