| // lex.h -- Go frontend lexer. -*- C++ -*- |
| |
| // Copyright 2009 The Go Authors. All rights reserved. |
| // Use of this source code is governed by a BSD-style |
| // license that can be found in the LICENSE file. |
| |
| #ifndef GO_LEX_H |
| #define GO_LEX_H |
| |
| #include <mpfr.h> |
| |
| #include "operator.h" |
| #include "go-linemap.h" |
| |
| #if __GNUC__ > 3 || (__GNUC__ == 3 && __GNUC_MINOR__ >= 4) |
| # define GO_ATTRIBUTE_UNUSED __attribute__ ((__unused__)) |
| #else |
| # define GO_ATTRIBUTE_UNUSED |
| #endif |
| |
| struct Unicode_range; |
| |
| // The keywords. These must be in sorted order, other than |
| // KEYWORD_INVALID. They must match the Keywords::mapping_ array in |
| // lex.cc. |
| |
| enum Keyword |
| { |
| KEYWORD_INVALID, // Not a keyword. |
| KEYWORD_ASM, |
| KEYWORD_BREAK, |
| KEYWORD_CASE, |
| KEYWORD_CHAN, |
| KEYWORD_CONST, |
| KEYWORD_CONTINUE, |
| KEYWORD_DEFAULT, |
| KEYWORD_DEFER, |
| KEYWORD_ELSE, |
| KEYWORD_FALLTHROUGH, |
| KEYWORD_FOR, |
| KEYWORD_FUNC, |
| KEYWORD_GO, |
| KEYWORD_GOTO, |
| KEYWORD_IF, |
| KEYWORD_IMPORT, |
| KEYWORD_INTERFACE, |
| KEYWORD_MAP, |
| KEYWORD_PACKAGE, |
| KEYWORD_RANGE, |
| KEYWORD_RETURN, |
| KEYWORD_SELECT, |
| KEYWORD_STRUCT, |
| KEYWORD_SWITCH, |
| KEYWORD_TYPE, |
| KEYWORD_VAR |
| }; |
| |
| // Pragmas built from magic comments and recorded for functions. |
| // These are used as bits in a bitmask. |
| // The set of values is intended to be the same as the gc compiler. |
| |
| enum GoPragma |
| { |
| GOPRAGMA_NOINTERFACE = 1 << 0, // Method not in type descriptor. |
| GOPRAGMA_NOESCAPE = 1 << 1, // Args do not escape. |
| GOPRAGMA_NORACE = 1 << 2, // No race detector. |
| GOPRAGMA_NOSPLIT = 1 << 3, // Do not split stack. |
| GOPRAGMA_NOINLINE = 1 << 4, // Do not inline. |
| GOPRAGMA_SYSTEMSTACK = 1 << 5, // Must run on system stack. |
| GOPRAGMA_NOWRITEBARRIER = 1 << 6, // No write barriers. |
| GOPRAGMA_NOWRITEBARRIERREC = 1 << 7, // No write barriers here or callees. |
| GOPRAGMA_YESWRITEBARRIERREC = 1 << 8, // Stops nowritebarrierrec. |
| GOPRAGMA_MARK = 1 << 9, // Marker for nowritebarrierrec. |
| GOPRAGMA_CGOUNSAFEARGS = 1 << 10, // Pointer to arg is pointer to all. |
| GOPRAGMA_UINTPTRESCAPES = 1 << 11, // uintptr(p) escapes. |
| GOPRAGMA_NOTINHEAP = 1 << 12 // type is not in heap. |
| }; |
| |
| // A token returned from the lexer. |
| |
| class Token |
| { |
| public: |
| // Token classification. |
| enum Classification |
| { |
| // Token is invalid. |
| TOKEN_INVALID, |
| // Token indicates end of input. |
| TOKEN_EOF, |
| // Token is a keyword. |
| TOKEN_KEYWORD, |
| // Token is an identifier. |
| TOKEN_IDENTIFIER, |
| // Token is a string of characters. |
| TOKEN_STRING, |
| // Token is an operator. |
| TOKEN_OPERATOR, |
| // Token is a character constant. |
| TOKEN_CHARACTER, |
| // Token is an integer. |
| TOKEN_INTEGER, |
| // Token is a floating point number. |
| TOKEN_FLOAT, |
| // Token is an imaginary number. |
| TOKEN_IMAGINARY |
| }; |
| |
| ~Token(); |
| Token(const Token&); |
| Token& operator=(const Token&); |
| |
| // Get token classification. |
| Classification |
| classification() const |
| { return this->classification_; } |
| |
| // Make a token for an invalid value. |
| static Token |
| make_invalid_token(Location location) |
| { return Token(TOKEN_INVALID, location); } |
| |
| // Make a token representing end of file. |
| static Token |
| make_eof_token(Location location) |
| { return Token(TOKEN_EOF, location); } |
| |
| // Make a keyword token. |
| static Token |
| make_keyword_token(Keyword keyword, Location location) |
| { |
| Token tok(TOKEN_KEYWORD, location); |
| tok.u_.keyword = keyword; |
| return tok; |
| } |
| |
| // Make an identifier token. |
| static Token |
| make_identifier_token(const std::string& value, bool is_exported, |
| Location location) |
| { |
| Token tok(TOKEN_IDENTIFIER, location); |
| tok.u_.identifier_value.name = new std::string(value); |
| tok.u_.identifier_value.is_exported = is_exported; |
| return tok; |
| } |
| |
| // Make a quoted string token. |
| static Token |
| make_string_token(const std::string& value, Location location) |
| { |
| Token tok(TOKEN_STRING, location); |
| tok.u_.string_value = new std::string(value); |
| return tok; |
| } |
| |
| // Make an operator token. |
| static Token |
| make_operator_token(Operator op, Location location) |
| { |
| Token tok(TOKEN_OPERATOR, location); |
| tok.u_.op = op; |
| return tok; |
| } |
| |
| // Make a character constant token. |
| static Token |
| make_character_token(mpz_t val, Location location) |
| { |
| Token tok(TOKEN_CHARACTER, location); |
| mpz_init(tok.u_.integer_value); |
| mpz_swap(tok.u_.integer_value, val); |
| return tok; |
| } |
| |
| // Make an integer token. |
| static Token |
| make_integer_token(mpz_t val, Location location) |
| { |
| Token tok(TOKEN_INTEGER, location); |
| mpz_init(tok.u_.integer_value); |
| mpz_swap(tok.u_.integer_value, val); |
| return tok; |
| } |
| |
| // Make a float token. |
| static Token |
| make_float_token(mpfr_t val, Location location) |
| { |
| Token tok(TOKEN_FLOAT, location); |
| mpfr_init(tok.u_.float_value); |
| mpfr_swap(tok.u_.float_value, val); |
| return tok; |
| } |
| |
| // Make a token for an imaginary number. |
| static Token |
| make_imaginary_token(mpfr_t val, Location location) |
| { |
| Token tok(TOKEN_IMAGINARY, location); |
| mpfr_init(tok.u_.float_value); |
| mpfr_swap(tok.u_.float_value, val); |
| return tok; |
| } |
| |
| // Get the location of the token. |
| Location |
| location() const |
| { return this->location_; } |
| |
| // Return whether this is an invalid token. |
| bool |
| is_invalid() const |
| { return this->classification_ == TOKEN_INVALID; } |
| |
| // Return whether this is the EOF token. |
| bool |
| is_eof() const |
| { return this->classification_ == TOKEN_EOF; } |
| |
| // Return the keyword value for a keyword token. |
| Keyword |
| keyword() const |
| { |
| go_assert(this->classification_ == TOKEN_KEYWORD); |
| return this->u_.keyword; |
| } |
| |
| // Return whether this is an identifier. |
| bool |
| is_identifier() const |
| { return this->classification_ == TOKEN_IDENTIFIER; } |
| |
| // Return the identifier. |
| const std::string& |
| identifier() const |
| { |
| go_assert(this->classification_ == TOKEN_IDENTIFIER); |
| return *this->u_.identifier_value.name; |
| } |
| |
| // Return whether the identifier is exported. |
| bool |
| is_identifier_exported() const |
| { |
| go_assert(this->classification_ == TOKEN_IDENTIFIER); |
| return this->u_.identifier_value.is_exported; |
| } |
| |
| // Return whether this is a string. |
| bool |
| is_string() const |
| { |
| return this->classification_ == TOKEN_STRING; |
| } |
| |
| // Return the value of a string. The returned value is a string of |
| // UTF-8 characters. |
| std::string |
| string_value() const |
| { |
| go_assert(this->classification_ == TOKEN_STRING); |
| return *this->u_.string_value; |
| } |
| |
| // Return the value of a character constant. |
| const mpz_t* |
| character_value() const |
| { |
| go_assert(this->classification_ == TOKEN_CHARACTER); |
| return &this->u_.integer_value; |
| } |
| |
| // Return the value of an integer. |
| const mpz_t* |
| integer_value() const |
| { |
| go_assert(this->classification_ == TOKEN_INTEGER); |
| return &this->u_.integer_value; |
| } |
| |
| // Return the value of a float. |
| const mpfr_t* |
| float_value() const |
| { |
| go_assert(this->classification_ == TOKEN_FLOAT); |
| return &this->u_.float_value; |
| } |
| |
| // Return the value of an imaginary number. |
| const mpfr_t* |
| imaginary_value() const |
| { |
| go_assert(this->classification_ == TOKEN_IMAGINARY); |
| return &this->u_.float_value; |
| } |
| |
| // Return the operator value for an operator token. |
| Operator |
| op() const |
| { |
| go_assert(this->classification_ == TOKEN_OPERATOR); |
| return this->u_.op; |
| } |
| |
| // Return whether this token is KEYWORD. |
| bool |
| is_keyword(Keyword keyword) const |
| { |
| return (this->classification_ == TOKEN_KEYWORD |
| && this->u_.keyword == keyword); |
| } |
| |
| // Return whether this token is OP. |
| bool |
| is_op(Operator op) const |
| { return this->classification_ == TOKEN_OPERATOR && this->u_.op == op; } |
| |
| // Print the token for debugging. |
| void |
| print(FILE*) const; |
| |
| private: |
| // Private constructor used by make_..._token functions above. |
| Token(Classification, Location); |
| |
| // Clear the token. |
| void |
| clear(); |
| |
| // The token classification. |
| Classification classification_; |
| union |
| { |
| // The keyword value for TOKEN_KEYWORD. |
| Keyword keyword; |
| // The token value for TOKEN_IDENTIFIER. |
| struct |
| { |
| // The name of the identifier. This has been mangled to only |
| // include ASCII characters. |
| std::string* name; |
| // Whether this name should be exported. This is true if the |
| // first letter in the name is upper case. |
| bool is_exported; |
| } identifier_value; |
| // The string value for TOKEN_STRING. |
| std::string* string_value; |
| // The token value for TOKEN_CHARACTER or TOKEN_INTEGER. |
| mpz_t integer_value; |
| // The token value for TOKEN_FLOAT or TOKEN_IMAGINARY. |
| mpfr_t float_value; |
| // The token value for TOKEN_OPERATOR or the keyword value |
| Operator op; |
| } u_; |
| // The source location. |
| Location location_; |
| }; |
| |
| // The lexer itself. |
| |
| class Lex |
| { |
| public: |
| Lex(const char* input_file_name, FILE* input_file, Linemap *linemap); |
| |
| ~Lex(); |
| |
| // Return the next token. |
| Token |
| next_token(); |
| |
| // Return the contents of any current //extern comment. |
| const std::string& |
| extern_name() const |
| { return this->extern_; } |
| |
| // Return the current set of pragmas, and clear them. |
| unsigned int |
| get_and_clear_pragmas() |
| { |
| unsigned int ret = this->pragmas_; |
| this->pragmas_ = 0; |
| return ret; |
| } |
| |
| struct Linkname |
| { |
| std::string ext_name; // External name; empty to just export. |
| bool is_exported; // Whether the internal name is exported. |
| Location loc; // Location of go:linkname directive. |
| |
| Linkname() |
| : ext_name(), is_exported(false), loc() |
| { } |
| |
| Linkname(const std::string& ext_name_a, bool is_exported_a, Location loc_a) |
| : ext_name(ext_name_a), is_exported(is_exported_a), loc(loc_a) |
| { } |
| }; |
| |
| typedef std::map<std::string, Linkname> Linknames; |
| |
| // Return the linknames seen so far, or NULL if none, and clear the |
| // set. These are from go:linkname compiler directives. |
| Linknames* |
| get_and_clear_linknames() |
| { |
| Linknames* ret = this->linknames_; |
| this->linknames_ = NULL; |
| return ret; |
| } |
| |
| // Return whether there are any current go:embed patterns. |
| bool |
| has_embeds() const |
| { return !this->embeds_.empty(); } |
| |
| // If there are any go:embed patterns seen so far, store them in |
| // *EMBEDS and clear the saved set. *EMBEDS must be an empty |
| // vector. |
| void |
| get_and_clear_embeds(std::vector<std::string>* embeds) |
| { |
| go_assert(embeds->empty()); |
| std::swap(*embeds, this->embeds_); |
| } |
| |
| // Clear any go:embed patterns seen so far. This is used for |
| // erroneous cases. |
| void |
| clear_embeds() |
| { this->embeds_.clear(); } |
| |
| // Return whether the identifier NAME should be exported. NAME is a |
| // mangled name which includes only ASCII characters. |
| static bool |
| is_exported_mangled_name(const std::string& name); |
| |
| // Return whether the identifier NAME should be exported. NAME is |
| // an unmangled utf-8 string and may contain non-ASCII characters. |
| static bool |
| is_exported_name(const std::string& name); |
| |
| // Return whether the identifier NAME is invalid. When we see an |
| // invalid character we still build an identifier, but we use a |
| // magic string to indicate that the identifier is invalid. We then |
| // use this to avoid knockon errors. |
| static bool |
| is_invalid_identifier(const std::string& name); |
| |
| // A helper function. Append V to STR. IS_CHARACTER is true if V |
| // is a Unicode character which should be converted into UTF-8, |
| // false if it is a byte value to be appended directly. The |
| // location is used to warn about an out of range character. |
| static void |
| append_char(unsigned int v, bool is_charater, std::string* str, |
| Location); |
| |
| // A helper function. Fetch a UTF-8 character from STR and store it |
| // in *VALUE. Return the number of bytes read from STR. Return 0 |
| // if STR does not point to a valid UTF-8 character. |
| static int |
| fetch_char(const char* str, unsigned int *value); |
| |
| // Return whether C is a Unicode or "C" locale space character. |
| static bool |
| is_unicode_space(unsigned int c); |
| |
| // Convert the specified hex char into an unsigned integer value. |
| static unsigned |
| hex_val(char c); |
| |
| private: |
| ssize_t |
| get_line(); |
| |
| bool |
| require_line(); |
| |
| // The current location. |
| Location |
| location() const; |
| |
| // A position CHARS column positions before the current location. |
| Location |
| earlier_location(int chars) const; |
| |
| static bool |
| is_hex_digit(char); |
| |
| static bool |
| is_base_digit(int base, char); |
| |
| static unsigned char |
| octal_value(char c) |
| { return c - '0'; } |
| |
| Token |
| make_invalid_token() |
| { return Token::make_invalid_token(this->location()); } |
| |
| Token |
| make_eof_token() |
| { return Token::make_eof_token(this->location()); } |
| |
| Token |
| make_operator(Operator op, int chars) |
| { return Token::make_operator_token(op, this->earlier_location(chars)); } |
| |
| Token |
| gather_identifier(); |
| |
| static bool |
| could_be_exponent(int base, const char*, const char*); |
| |
| Token |
| gather_number(); |
| |
| void |
| skip_exponent(); |
| |
| Token |
| gather_character(); |
| |
| Token |
| gather_string(); |
| |
| Token |
| gather_raw_string(); |
| |
| const char* |
| advance_one_utf8_char(const char*, unsigned int*, bool*); |
| |
| const char* |
| advance_one_char(const char*, bool, unsigned int*, bool*); |
| |
| static bool |
| is_unicode_digit(unsigned int c); |
| |
| static bool |
| is_unicode_letter(unsigned int c); |
| |
| static bool |
| is_unicode_uppercase(unsigned int c); |
| |
| static bool |
| is_in_unicode_range(unsigned int C, const Unicode_range* ranges, |
| size_t range_size); |
| |
| Operator |
| three_character_operator(char, char, char); |
| |
| Operator |
| two_character_operator(char, char); |
| |
| Operator |
| one_character_operator(char); |
| |
| bool |
| skip_c_comment(bool* found_newline); |
| |
| void |
| skip_cpp_comment(); |
| |
| void |
| gather_embed(const char*, const char*); |
| |
| // The input file name. |
| const char* input_file_name_ GO_ATTRIBUTE_UNUSED; |
| // The input file. |
| FILE* input_file_; |
| // The object used to keep track of file names and line numbers. |
| Linemap* linemap_; |
| // The line buffer. This holds the current line. |
| char* linebuf_; |
| // The size of the line buffer. |
| size_t linebufsize_; |
| // The nmber of characters in the current line. |
| size_t linesize_; |
| // The current offset in linebuf_. |
| size_t lineoff_; |
| // The current line number. |
| size_t lineno_; |
| // Whether to add a semicolon if we see a newline now. |
| bool add_semi_at_eol_; |
| // Pragmas for the next function, from magic comments. |
| unsigned int pragmas_; |
| // The external name to use for a function declaration, from a magic |
| // //extern comment. |
| std::string extern_; |
| // The list of //go:linkname comments, if any. |
| Linknames* linknames_; |
| // The list of //go:embed patterns, if any. |
| std::vector<std::string> embeds_; |
| }; |
| |
| #endif // !defined(GO_LEX_H) |