// Copyright (C) 2020-2024 Free Software Foundation, Inc. // This file is part of GCC. // GCC is free software; you can redistribute it and/or modify it under // the terms of the GNU General Public License as published by the Free // Software Foundation; either version 3, or (at your option) any later // version. // GCC is distributed in the hope that it will be useful, but WITHOUT ANY // WARRANTY; without even the implied warranty of MERCHANTABILITY or // FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License // for more details. // You should have received a copy of the GNU General Public License // along with GCC; see the file COPYING3. If not see // . #ifndef RUST_LEX_H #define RUST_LEX_H #include "rust-linemap.h" #include "rust-buffered-queue.h" #include "rust-token.h" #include "optional.h" #include "selftest.h" #include "rust-input-source.h" namespace Rust { // Simple wrapper for FILE* that simplifies destruction. struct RAIIFile { private: FILE *file; const char *filename; void close () { if (file != nullptr && file != stdin) fclose (file); } static bool allowed_filetype (const struct stat &statbuf) { // The file could be either // - a regular file // - a char device (/dev/null...) return S_ISREG (statbuf.st_mode) || S_ISCHR (statbuf.st_mode); } public: RAIIFile (const char *filename) : filename (filename) { if (strcmp (filename, "-") == 0) { file = stdin; } else { struct stat statbuf; if (!(file = fopen (filename, "r"))) { return; } if (-1 == fstat (fileno (file), &statbuf) || !allowed_filetype (statbuf)) { fclose (file); file = nullptr; errno = EISDIR; } } } /** * Create a RAIIFile from an existing instance of FILE* */ RAIIFile (FILE *raw, const char *filename = nullptr) : file (raw), filename (filename) {} RAIIFile (const RAIIFile &other) = delete; RAIIFile &operator= (const RAIIFile &other) = delete; // have to specify setting file to nullptr, otherwise unintended fclose occurs RAIIFile (RAIIFile &&other) : file (other.file), filename (other.filename) { other.file = nullptr; } RAIIFile &operator= (RAIIFile &&other) { close (); file = other.file; filename = other.filename; other.file = nullptr; return *this; } static RAIIFile create_error () { return RAIIFile (nullptr, nullptr); } ~RAIIFile () { close (); } FILE *get_raw () { return file; } const char *get_filename () { return filename; } bool ok () const { return file; } }; class Lexer { private: // Request new Location for current column in line_table location_t get_current_location (); // Skips the current input char. void skip_input (); // Advances current input char to n + 1 chars ahead of current position. void skip_input (int n); // Peeks the current char. Codepoint peek_input (); // Returns char n bytes ahead of current position. Codepoint peek_input (int n); // Classifies keyword (i.e. gets id for keyword). TokenId classify_keyword (const std::string &str); std::tuple parse_in_decimal (); std::pair parse_in_exponent_part (); std::pair parse_in_type_suffix (); std::tuple parse_escape (char opening_char); std::tuple parse_utf8_escape (); int parse_partial_string_continue (); std::pair parse_partial_hex_escape (); std::pair parse_partial_unicode_escape (); void skip_broken_string_input (Codepoint current_char); TokenPtr parse_byte_char (location_t loc); TokenPtr parse_byte_string (location_t loc); TokenPtr parse_raw_byte_string (location_t loc); TokenPtr parse_raw_identifier (location_t loc); TokenPtr parse_string (location_t loc); TokenPtr maybe_parse_raw_string (location_t loc); TokenPtr parse_raw_string (location_t loc, int initial_hash_count); TokenPtr parse_non_decimal_int_literals (location_t loc); TokenPtr parse_decimal_int_or_float (location_t loc); TokenPtr parse_char_or_lifetime (location_t loc); TokenPtr parse_identifier_or_keyword (location_t loc); template TokenPtr parse_non_decimal_int_literal (location_t loc, IsDigitFunc is_digit_func, std::string existent_str, int base); public: // Construct lexer with input file and filename provided Lexer (const char *filename, RAIIFile input, Linemap *linemap, tl::optional dump_lex_opt = tl::nullopt); // Lex the contents of a string instead of a file Lexer (const std::string &input, Linemap *linemap); // dtor ~Lexer (); // don't allow copy semantics (for now, at least) Lexer (const Lexer &other) = delete; Lexer &operator= (const Lexer &other) = delete; // enable move semantics Lexer (Lexer &&other) = default; Lexer &operator= (Lexer &&other) = default; bool input_source_is_valid_utf8 (); // Returns token n tokens ahead of current position. const_TokenPtr peek_token (int n) { return token_queue.peek (n); } // Peeks the current token. const_TokenPtr peek_token () { return peek_token (0); } // Builds a token from the input queue. TokenPtr build_token (); // Advances current token to n + 1 tokens ahead of current position. void skip_token (int n); // Skips the current token. void skip_token () { skip_token (0); } // Dumps and advances by n + 1 tokens. void dump_and_skip (int n); // Replaces the current token with a specified token. void replace_current_token (TokenPtr replacement); // FIXME: don't use anymore /* Splits the current token into two. Intended for use with nested generics * closes (i.e. T> where >> is wrongly lexed as one token). Note that * this will only work with "simple" tokens like punctuation. */ void split_current_token (TokenId new_left, TokenId new_right); void split_current_token (std::vector new_tokens); Linemap *get_line_map () { return line_map; } std::string get_filename () { return std::string (input.get_filename ()); } private: void start_line (int current_line, int current_column); // File for use as input. RAIIFile input; // TODO is this actually required? could just have file storage in InputSource // Current line number. int current_line; // Current column number. int current_column; // Current character. Codepoint current_char; // Line map. Linemap *line_map; /* Max column number that can be quickly allocated - higher may require * allocating new linemap */ static const int max_column_hint = 80; tl::optional dump_lex_out; // The input source for the lexer. // InputSource input_source; // Input file queue. std::unique_ptr raw_input_source; buffered_queue> input_queue; // Token source wrapper thing. struct TokenSource { // The lexer object that will use this TokenSource. Lexer *lexer; // Create a new TokenSource with given lexer. TokenSource (Lexer *parLexer) : lexer (parLexer) {} // Used to mimic std::reference_wrapper that is used for InputSource. TokenSource &get () { return *this; } // Overload operator () to build token in lexer. TokenPtr next () { return lexer->build_token (); } }; // The token source for the lexer. // TokenSource token_source; // Token stream queue. buffered_queue, TokenSource> token_queue; }; } // namespace Rust #if CHECKING_P namespace selftest { void rust_input_source_test (); } // namespace selftest #endif // CHECKING_P #endif