// Copyright (C) 2020-2024 Free Software Foundation, Inc. // This file is part of GCC. // GCC is free software; you can redistribute it and/or modify it under // the terms of the GNU General Public License as published by the Free // Software Foundation; either version 3, or (at your option) any later // version. // GCC is distributed in the hope that it will be useful, but WITHOUT ANY // WARRANTY; without even the implied warranty of MERCHANTABILITY or // FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License // for more details. // You should have received a copy of the GNU General Public License // along with GCC; see the file COPYING3. If not see // . #ifndef RUST_TOKEN_H #define RUST_TOKEN_H #include "rust-system.h" #include "rust-linemap.h" #include "rust-make-unique.h" #include "rust-unicode.h" namespace Rust { // "Primitive core types" in Rust - the different int and float types, as well // as some others enum PrimitiveCoreType { CORETYPE_UNKNOWN, // named primitives CORETYPE_BOOL, CORETYPE_CHAR, CORETYPE_STR, // okay technically int and uint are arch-dependent (pointer size) CORETYPE_INT, CORETYPE_UINT, // numbered number primitives CORETYPE_F32, CORETYPE_F64, CORETYPE_I8, CORETYPE_I16, CORETYPE_I32, CORETYPE_I64, CORETYPE_I128, CORETYPE_U8, CORETYPE_U16, CORETYPE_U32, CORETYPE_U64, CORETYPE_U128, // Pure decimals are used for tuple index. // Also means there is no type hint. CORETYPE_PURE_DECIMAL, // arch-dependent pointer sizes CORETYPE_ISIZE = CORETYPE_INT, CORETYPE_USIZE = CORETYPE_UINT }; // RS_TOKEN(name, description) // RS_TOKEN_KEYWORD_{2015,2018}(name, identifier) // Keep RS_TOKEN_KEYWORD sorted /* note that abstract, async, become, box, do, final, macro, override, priv, * try, typeof, unsized, virtual, and yield are unused */ #define RS_TOKEN_LIST \ RS_TOKEN (FIRST_TOKEN, "") \ RS_TOKEN (END_OF_FILE, "end of file") \ RS_TOKEN (EXCLAM, "!") \ RS_TOKEN (NOT_EQUAL, "!=") \ RS_TOKEN (PERCENT, "%") \ RS_TOKEN (PERCENT_EQ, "%=") \ RS_TOKEN (AMP, "&") \ RS_TOKEN (AMP_EQ, "&=") \ RS_TOKEN (LOGICAL_AND, "&&") \ RS_TOKEN (ASTERISK, "*") \ RS_TOKEN (ASTERISK_EQ, "*=") \ RS_TOKEN (PLUS, "+") \ RS_TOKEN (PLUS_EQ, "+=") \ RS_TOKEN (COMMA, ",") \ RS_TOKEN (MINUS, "-") \ RS_TOKEN (MINUS_EQ, "-=") \ RS_TOKEN (RETURN_TYPE, "->") \ RS_TOKEN (DOT, ".") \ RS_TOKEN (DOT_DOT, "..") \ RS_TOKEN (DOT_DOT_EQ, "..=") \ RS_TOKEN (ELLIPSIS, "...") \ RS_TOKEN (DIV, "/") \ RS_TOKEN (DIV_EQ, "/=") \ RS_TOKEN (COLON, ":") \ RS_TOKEN (SEMICOLON, ";") \ RS_TOKEN (LEFT_SHIFT, "<<") \ RS_TOKEN (LEFT_SHIFT_EQ, "<<=") \ RS_TOKEN (LEFT_ANGLE, "<") \ RS_TOKEN (LESS_OR_EQUAL, "<=") \ RS_TOKEN (EQUAL, "=") \ RS_TOKEN (EQUAL_EQUAL, "==") \ RS_TOKEN (MATCH_ARROW, "=>") \ RS_TOKEN (RIGHT_ANGLE, ">") \ RS_TOKEN (GREATER_OR_EQUAL, ">=") \ RS_TOKEN (RIGHT_SHIFT, ">>") \ RS_TOKEN (RIGHT_SHIFT_EQ, ">>=") \ RS_TOKEN (PATTERN_BIND, "@") \ RS_TOKEN (TILDE, "~") \ RS_TOKEN (BACKSLASH, "\\") \ RS_TOKEN (BACKTICK, "`") \ RS_TOKEN (CARET, "^") \ RS_TOKEN (CARET_EQ, "^=") \ RS_TOKEN (PIPE, "|") \ RS_TOKEN (PIPE_EQ, "|=") \ RS_TOKEN (OR, "||") \ RS_TOKEN (QUESTION_MARK, "?") \ RS_TOKEN (HASH, "#") \ /* from here on, dodgy and may not be correct. not operators and may be \ * symbols */ \ /* RS_TOKEN(SPACE, " ") probably too dodgy */ \ /* RS_TOKEN(NEWLINE, "\n")*/ \ RS_TOKEN (SCOPE_RESOLUTION, "::") /* dodgy */ \ RS_TOKEN (SINGLE_QUOTE, "'") /* should i differentiate from lifetime? */ \ RS_TOKEN (DOUBLE_QUOTE, "\"") \ RS_TOKEN (IDENTIFIER, "identifier") \ RS_TOKEN (INT_LITERAL, \ "integer literal") /* do different int and float types need \ different literal types? */ \ RS_TOKEN (FLOAT_LITERAL, "float literal") \ RS_TOKEN (STRING_LITERAL, "string literal") \ RS_TOKEN (CHAR_LITERAL, "character literal") \ RS_TOKEN (BYTE_STRING_LITERAL, "byte string literal") \ RS_TOKEN (BYTE_CHAR_LITERAL, "byte character literal") \ RS_TOKEN (LIFETIME, "lifetime") /* TODO: improve token type */ \ /* Have "interpolated" tokens (whatever that means)? identifer, path, type, \ * pattern, */ \ /* expression, statement, block, meta, item in mrustc (but not directly in \ * lexer). */ \ RS_TOKEN (LEFT_PAREN, "(") \ RS_TOKEN (RIGHT_PAREN, ")") \ RS_TOKEN (LEFT_CURLY, "{") \ RS_TOKEN (RIGHT_CURLY, "}") \ RS_TOKEN (LEFT_SQUARE, "[") \ RS_TOKEN (RIGHT_SQUARE, "]") \ /* Macros */ \ RS_TOKEN (DOLLAR_SIGN, "$") \ /* Doc Comments */ \ RS_TOKEN (INNER_DOC_COMMENT, "#![doc]") \ RS_TOKEN (OUTER_DOC_COMMENT, "#[doc]") \ RS_TOKEN_KEYWORD_2015 (ABSTRACT, "abstract") /* unused */ \ RS_TOKEN_KEYWORD_2015 (AS, "as") \ RS_TOKEN_KEYWORD_2018 (ASYNC, "async") /* unused */ \ RS_TOKEN_KEYWORD_2015 (AUTO, "auto") \ RS_TOKEN_KEYWORD_2018 (AWAIT, "await") \ RS_TOKEN_KEYWORD_2015 (BECOME, "become") /* unused */ \ RS_TOKEN_KEYWORD_2015 (BOX, "box") /* unused */ \ RS_TOKEN_KEYWORD_2015 (BREAK, "break") \ RS_TOKEN_KEYWORD_2015 (CONST, "const") \ RS_TOKEN_KEYWORD_2015 (CONTINUE, "continue") \ RS_TOKEN_KEYWORD_2015 (CRATE, "crate") \ RS_TOKEN_KEYWORD_2015 (DO, "do") /* unused */ \ RS_TOKEN_KEYWORD_2018 (DYN, "dyn") \ RS_TOKEN_KEYWORD_2015 (ELSE, "else") \ RS_TOKEN_KEYWORD_2015 (ENUM_KW, "enum") \ RS_TOKEN_KEYWORD_2015 (EXTERN_KW, "extern") \ RS_TOKEN_KEYWORD_2015 (FALSE_LITERAL, "false") \ RS_TOKEN_KEYWORD_2015 (FINAL_KW, "final") /* unused */ \ RS_TOKEN_KEYWORD_2015 (FN_KW, "fn") \ RS_TOKEN_KEYWORD_2015 (FOR, "for") \ RS_TOKEN_KEYWORD_2015 (IF, "if") \ RS_TOKEN_KEYWORD_2015 (IMPL, "impl") \ RS_TOKEN_KEYWORD_2015 (IN, "in") \ RS_TOKEN_KEYWORD_2015 (LET, "let") \ RS_TOKEN_KEYWORD_2015 (LOOP, "loop") \ RS_TOKEN_KEYWORD_2015 (MACRO, "macro") \ RS_TOKEN_KEYWORD_2015 (MATCH_KW, "match") \ RS_TOKEN_KEYWORD_2015 (MOD, "mod") \ RS_TOKEN_KEYWORD_2015 (MOVE, "move") \ RS_TOKEN_KEYWORD_2015 (MUT, "mut") \ RS_TOKEN_KEYWORD_2015 (OVERRIDE_KW, "override") /* unused */ \ RS_TOKEN_KEYWORD_2015 (PRIV, "priv") /* unused */ \ RS_TOKEN_KEYWORD_2015 (PUB, "pub") \ RS_TOKEN_KEYWORD_2015 (REF, "ref") \ RS_TOKEN_KEYWORD_2015 (RETURN_KW, "return") \ RS_TOKEN_KEYWORD_2015 ( \ SELF_ALIAS, "Self") /* mrustc does not treat this as a reserved word*/ \ RS_TOKEN_KEYWORD_2015 (SELF, "self") \ RS_TOKEN_KEYWORD_2015 (STATIC_KW, "static") \ RS_TOKEN_KEYWORD_2015 (STRUCT_KW, "struct") \ RS_TOKEN_KEYWORD_2015 (SUPER, "super") \ RS_TOKEN_KEYWORD_2015 (TRAIT, "trait") \ RS_TOKEN_KEYWORD_2015 (TRUE_LITERAL, "true") \ RS_TOKEN_KEYWORD_2015 (TRY, "try") /* unused */ \ RS_TOKEN_KEYWORD_2015 (TYPE, "type") \ RS_TOKEN_KEYWORD_2015 (TYPEOF, "typeof") /* unused */ \ RS_TOKEN_KEYWORD_2015 (UNDERSCORE, "_") \ RS_TOKEN_KEYWORD_2015 (UNSAFE, "unsafe") \ RS_TOKEN_KEYWORD_2015 (UNSIZED, "unsized") /* unused */ \ RS_TOKEN_KEYWORD_2015 (USE, "use") \ RS_TOKEN_KEYWORD_2015 (VIRTUAL, "virtual") /* unused */ \ RS_TOKEN_KEYWORD_2015 (WHERE, "where") \ RS_TOKEN_KEYWORD_2015 (WHILE, "while") \ RS_TOKEN_KEYWORD_2015 (YIELD, "yield") /* unused */ \ RS_TOKEN (LAST_TOKEN, "") // Contains all token types. Crappy implementation via x-macros. enum TokenId { #define RS_TOKEN(name, _) name, #define RS_TOKEN_KEYWORD_2015(x, y) RS_TOKEN (x, y) #define RS_TOKEN_KEYWORD_2018 RS_TOKEN_KEYWORD_2015 RS_TOKEN_LIST #undef RS_TOKEN_KEYWORD_2015 #undef RS_TOKEN_KEYWORD_2018 #undef RS_TOKEN }; // dodgy "TokenPtr" declaration with Token forward declaration class Token; // A smart pointer (shared_ptr) to Token. typedef std::shared_ptr TokenPtr; // A smart pointer (shared_ptr) to a constant Token. typedef std::shared_ptr const_TokenPtr; // Hackily defined way to get token description for enum value using x-macros const char * get_token_description (TokenId id); /* Hackily defined way to get token description as a string for enum value using * x-macros */ const char * token_id_to_str (TokenId id); /* checks if a token is a keyword */ bool token_id_is_keyword (TokenId id); /* gets the string associated with a keyword */ const std::string & token_id_keyword_string (TokenId id); // Get type hint description as a string. const char * get_type_hint_string (PrimitiveCoreType type); /* Normalize string if a token is a identifier */ std::string nfc_normalize_token_string (location_t loc, TokenId id, const std::string &str); // Represents a single token. Create using factory static methods. class Token { private: // Token kind. TokenId token_id; // Token location. location_t locus; // Associated text (if any) of token. std::unique_ptr str; // TODO: maybe remove issues and just store std::string as value? /* Type hint for token based on lexer data (e.g. type suffix). Does not exist * for most tokens. */ PrimitiveCoreType type_hint; // Token constructor from token id and location. Has a null string. Token (TokenId token_id, location_t location) : token_id (token_id), locus (location), str (nullptr), type_hint (CORETYPE_UNKNOWN) {} // Token constructor from token id, location, and a string. Token (TokenId token_id, location_t location, std::string &¶mStr) : token_id (token_id), locus (location), type_hint (CORETYPE_UNKNOWN) { // Normalize identifier tokens str = Rust::make_unique ( nfc_normalize_token_string (location, token_id, paramStr)); } // Token constructor from token id, location, and a char. Token (TokenId token_id, location_t location, char paramChar) : token_id (token_id), locus (location), str (new std::string (1, paramChar)), type_hint (CORETYPE_UNKNOWN) { // Do not need to normalize 1byte char } // Token constructor from token id, location, and a "codepoint". Token (TokenId token_id, location_t location, Codepoint paramCodepoint) : token_id (token_id), locus (location), type_hint (CORETYPE_UNKNOWN) { // Normalize identifier tokens str = Rust::make_unique ( nfc_normalize_token_string (location, token_id, paramCodepoint.as_string ())); } // Token constructor from token id, location, a string, and type hint. Token (TokenId token_id, location_t location, std::string &¶mStr, PrimitiveCoreType parType) : token_id (token_id), locus (location), type_hint (parType) { // Normalize identifier tokens str = Rust::make_unique ( nfc_normalize_token_string (location, token_id, paramStr)); } public: // No default constructor. Token () = delete; // Do not copy/assign tokens. Token (const Token &) = delete; Token &operator= (const Token &) = delete; // Allow moving tokens. Token (Token &&other) = default; Token &operator= (Token &&other) = default; ~Token () = default; /* TODO: make_shared (which saves a heap allocation) does not work with the * private constructor */ // Makes and returns a new TokenPtr (with null string). static TokenPtr make (TokenId token_id, location_t locus) { // return std::make_shared (token_id, locus); return TokenPtr (new Token (token_id, locus)); } // Makes and returns a new TokenPtr of type IDENTIFIER. static TokenPtr make_identifier (location_t locus, std::string &&str) { // return std::make_shared (IDENTIFIER, locus, str); return TokenPtr (new Token (IDENTIFIER, locus, std::move (str))); } // Makes and returns a new TokenPtr of type INT_LITERAL. static TokenPtr make_int (location_t locus, std::string &&str, PrimitiveCoreType type_hint = CORETYPE_UNKNOWN) { // return std::make_shared (INT_LITERAL, locus, str, type_hint); return TokenPtr ( new Token (INT_LITERAL, locus, std::move (str), type_hint)); } // Makes and returns a new TokenPtr of type FLOAT_LITERAL. static TokenPtr make_float (location_t locus, std::string &&str, PrimitiveCoreType type_hint = CORETYPE_UNKNOWN) { // return std::make_shared (FLOAT_LITERAL, locus, str, type_hint); return TokenPtr ( new Token (FLOAT_LITERAL, locus, std::move (str), type_hint)); } // Makes and returns a new TokenPtr of type STRING_LITERAL. static TokenPtr make_string (location_t locus, std::string &&str) { // return std::make_shared (STRING_LITERAL, locus, str, // CORETYPE_STR); return TokenPtr ( new Token (STRING_LITERAL, locus, std::move (str), CORETYPE_STR)); } // Makes and returns a new TokenPtr of type CHAR_LITERAL. static TokenPtr make_char (location_t locus, Codepoint char_lit) { // return std::make_shared (CHAR_LITERAL, locus, char_lit); return TokenPtr (new Token (CHAR_LITERAL, locus, char_lit)); } // Makes and returns a new TokenPtr of type BYTE_CHAR_LITERAL. static TokenPtr make_byte_char (location_t locus, char byte_char) { // return std::make_shared (BYTE_CHAR_LITERAL, locus, byte_char); return TokenPtr (new Token (BYTE_CHAR_LITERAL, locus, byte_char)); } // Makes and returns a new TokenPtr of type BYTE_STRING_LITERAL (fix). static TokenPtr make_byte_string (location_t locus, std::string &&str) { // return std::make_shared (BYTE_STRING_LITERAL, locus, str); return TokenPtr (new Token (BYTE_STRING_LITERAL, locus, std::move (str))); } // Makes and returns a new TokenPtr of type INNER_DOC_COMMENT. static TokenPtr make_inner_doc_comment (location_t locus, std::string &&str) { return TokenPtr (new Token (INNER_DOC_COMMENT, locus, std::move (str))); } // Makes and returns a new TokenPtr of type OUTER_DOC_COMMENT. static TokenPtr make_outer_doc_comment (location_t locus, std::string &&str) { return TokenPtr (new Token (OUTER_DOC_COMMENT, locus, std::move (str))); } // Makes and returns a new TokenPtr of type LIFETIME. static TokenPtr make_lifetime (location_t locus, std::string &&str) { // return std::make_shared (LIFETIME, locus, str); return TokenPtr (new Token (LIFETIME, locus, std::move (str))); } // Gets id of the token. TokenId get_id () const { return token_id; } // Gets location of the token. location_t get_locus () const { return locus; } // Set location of the token. void set_locus (location_t locus) { this->locus = locus; } // Gets string description of the token. const std::string & get_str () const; /*{ // FIXME: put in header again when fix null problem //gcc_assert(str != nullptr); if (str == nullptr) { error_at(get_locus(), "attempted to get string for '%s', which has no string. returning empty string instead.", get_token_description()); return ""; } return *str; }*/ // Gets token's type hint info. PrimitiveCoreType get_type_hint () const { return type_hint == CORETYPE_PURE_DECIMAL ? CORETYPE_UNKNOWN : type_hint; } // diagnostics (error reporting) const char *get_token_description () const { return Rust::get_token_description (token_id); } // debugging const char *token_id_to_str () const { return Rust::token_id_to_str (token_id); } // debugging const char *get_type_hint_str () const; /* Returns whether the token is a literal of any type (int, float, char, * string, byte char, byte string). */ bool is_literal () const { switch (token_id) { case INT_LITERAL: case FLOAT_LITERAL: case CHAR_LITERAL: case STRING_LITERAL: case BYTE_CHAR_LITERAL: case BYTE_STRING_LITERAL: return true; default: return false; } } /* Returns whether the token actually has a string (regardless of whether it * should or not). */ bool has_str () const { return str != nullptr; } // Returns whether the token should have a string. bool should_have_str () const { return is_literal () || token_id == IDENTIFIER || token_id == LIFETIME; } // Returns whether the token is a pure decimal int literal bool is_pure_decimal () const { return type_hint == CORETYPE_PURE_DECIMAL; } // Return the token representation as someone would find it in the original // source code file. std::string as_string () const; }; } // namespace Rust namespace std { template <> struct hash { size_t operator() (const Rust::PrimitiveCoreType &coretype) const noexcept { return hash::type> () ( static_cast::type> ( coretype)); } }; } // namespace std #endif