// Copyright (C) 2020-2024 Free Software Foundation, Inc. // This file is part of GCC. // GCC is free software; you can redistribute it and/or modify it under // the terms of the GNU General Public License as published by the Free // Software Foundation; either version 3, or (at your option) any later // version. // GCC is distributed in the hope that it will be useful, but WITHOUT ANY // WARRANTY; without even the implied warranty of MERCHANTABILITY or // FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License // for more details. // You should have received a copy of the GNU General Public License // along with GCC; see the file COPYING3. If not see // . #include "rust-system.h" #include "rust-token.h" #include "rust-diagnostics.h" #include "rust-unicode.h" namespace Rust { // Hackily defined way to get token description for enum value using x-macros const char * get_token_description (TokenId id) { switch (id) { #define RS_TOKEN(name, descr) \ case name: \ return descr; #define RS_TOKEN_KEYWORD_2015(x, y) RS_TOKEN (x, y) #define RS_TOKEN_KEYWORD_2018 RS_TOKEN_KEYWORD_2015 RS_TOKEN_LIST #undef RS_TOKEN_KEYWORD_2015 #undef RS_TOKEN_KEYWORD_2018 #undef RS_TOKEN default: rust_unreachable (); } } /* Hackily defined way to get token description as a string for enum value using * x-macros */ const char * token_id_to_str (TokenId id) { switch (id) { #define RS_TOKEN(name, _) \ case name: \ return #name; #define RS_TOKEN_KEYWORD_2015(x, y) RS_TOKEN (x, y) #define RS_TOKEN_KEYWORD_2018 RS_TOKEN_KEYWORD_2015 RS_TOKEN_LIST #undef RS_TOKEN_KEYWORD_2015 #undef RS_TOKEN_KEYWORD_2018 #undef RS_TOKEN default: rust_unreachable (); } } /* checks if a token is a keyword */ bool token_id_is_keyword (TokenId id) { switch (id) { #define RS_TOKEN_KEYWORD_2015(name, _) case name: #define RS_TOKEN_KEYWORD_2018 RS_TOKEN_KEYWORD_2015 #define RS_TOKEN(a, b) RS_TOKEN_LIST return true; #undef RS_TOKEN_KEYWORD_2015 #undef RS_TOKEN_KEYWORD_2018 #undef RS_TOKEN default: return false; } } /* gets the string associated with a keyword */ const std::string & token_id_keyword_string (TokenId id) { switch (id) { #define RS_TOKEN_KEYWORD_2015(id, str_ptr) \ case id: { \ static const std::string str (str_ptr); \ return str; \ } \ rust_unreachable (); #define RS_TOKEN_KEYWORD_2018 RS_TOKEN_KEYWORD_2015 #define RS_TOKEN(a, b) RS_TOKEN_LIST #undef RS_TOKEN_KEYWORD_2015 #undef RS_TOKEN_KEYWORD_2018 #undef RS_TOKEN default: rust_unreachable (); } } const char * get_type_hint_string (PrimitiveCoreType type) { switch (type) { case CORETYPE_BOOL: return "bool"; case CORETYPE_CHAR: return "char"; case CORETYPE_STR: return "str"; // case CORETYPE_INT: case CORETYPE_ISIZE: return "isize"; // case CORETYPE_UINT: case CORETYPE_USIZE: return "usize"; case CORETYPE_F32: return "f32"; case CORETYPE_F64: return "f64"; case CORETYPE_I8: return "i8"; case CORETYPE_I16: return "i16"; case CORETYPE_I32: return "i32"; case CORETYPE_I64: return "i64"; case CORETYPE_I128: return "i128"; case CORETYPE_U8: return "u8"; case CORETYPE_U16: return "u16"; case CORETYPE_U32: return "u32"; case CORETYPE_U64: return "u64"; case CORETYPE_U128: return "u128"; case CORETYPE_PURE_DECIMAL: return "pure_decimal"; case CORETYPE_UNKNOWN: default: return "unknown"; } } const char * Token::get_type_hint_str () const { return get_type_hint_string (type_hint); } std::string nfc_normalize_token_string (location_t loc, TokenId id, const std::string &str) { if (id == IDENTIFIER || id == LIFETIME) { tl::optional ustring = Utf8String::make_utf8_string (str); if (ustring.has_value ()) return ustring.value ().nfc_normalize ().as_string (); else rust_internal_error_at (loc, "identifier '%s' is not a valid UTF-8 string", str.c_str ()); } else return str; } const std::string & Token::get_str () const { if (token_id_is_keyword (token_id)) return token_id_keyword_string (token_id); // FIXME: attempt to return null again // gcc_assert(str != NULL); // HACK: allow referencing an empty string static const std::string empty = ""; if (str == NULL) { rust_error_at (get_locus (), "attempted to get string for %<%s%>, which has no string. " "returning empty string instead", get_token_description ()); return empty; } return *str; } namespace { enum class Context { String, Char }; const std::map matches = { {'\t', "\\t"}, {'\n', "\\n"}, {'\r', "\\r"}, {'\0', "\\0"}, {'\\', "\\\\"}, {'\v', "\\v"}, }; std::string escape_special_chars (const std::string &source, Context ctx) { std::stringstream stream; decltype (matches)::const_iterator result; for (char c : source) { // FIXME: #2411 Also replace escaped unicode values and \x digits if ((result = matches.find (c)) != matches.end ()) stream << result->second; else if (c == '\'' && ctx == Context::Char) stream << "\\'"; else if (c == '"' && ctx == Context::String) stream << "\\\""; else stream << c; } return stream.str (); } } // namespace std::string Token::as_string () const { if (should_have_str ()) { switch (get_id ()) { case STRING_LITERAL: return "\"" + escape_special_chars (get_str (), Context::String) + "\""; case BYTE_STRING_LITERAL: return "b\"" + escape_special_chars (get_str (), Context::String) + "\""; case CHAR_LITERAL: return "'" + escape_special_chars (get_str (), Context::Char) + "'"; case BYTE_CHAR_LITERAL: return "b'" + escape_special_chars (get_str (), Context::Char) + "'"; case LIFETIME: return "'" + get_str (); case SCOPE_RESOLUTION: return "::"; case INT_LITERAL: if (get_type_hint () == CORETYPE_UNKNOWN) return get_str (); else return get_str () + get_type_hint_str (); case FLOAT_LITERAL: if (get_type_hint () == CORETYPE_UNKNOWN) return get_str (); else return get_str () + get_type_hint_str (); default: return get_str (); } } else { return get_token_description (); } } } // namespace Rust