From 94f0866683bdf063b30323bac227e11a23110fa2 Mon Sep 17 00:00:00 2001 From: Boris Kolpackov Date: Sat, 22 Aug 2009 10:20:34 +0200 Subject: Add call and template expressions Also add support for querying the lexer failure state. --- cli/lexer.cxx | 113 ++++++++++++++++++++++++++++++++++++++++------- cli/lexer.hxx | 12 ++++- cli/lexer.ixx | 6 +++ cli/token.hxx | 29 +++++++++--- cli/token.ixx | 10 ++++- tests/lexer/driver.cxx | 12 ++++- tests/lexer/makefile | 2 +- tests/lexer/test-005.cli | 2 + tests/lexer/test-005.std | 3 ++ 9 files changed, 162 insertions(+), 27 deletions(-) create mode 100644 tests/lexer/test-005.cli create mode 100644 tests/lexer/test-005.std diff --git a/cli/lexer.cxx b/cli/lexer.cxx index 203667a..6cf012d 100644 --- a/cli/lexer.cxx +++ b/cli/lexer.cxx @@ -17,7 +17,8 @@ Lexer (istream& is, string const& id) l_ (1), c_(1), eos_ (false), - include_ (false) + include_ (false), + valid_ (true) { keyword_map_["include"] = Token::k_include; keyword_map_["namespace"] = Token::k_namespace; @@ -111,8 +112,8 @@ next () { if (include) return path_literal (c); - - break; + else + return template_expression (c); } case ';': { @@ -124,7 +125,12 @@ next () } case ':': { - return Token (Token::p_colon, c.line (), c.column ()); + if (peek () == ':') + { + get (); + return Token (Token::p_dcolon, c.line (), c.column ()); + } + break; } case '{': { @@ -136,11 +142,7 @@ next () } case '(': { - return Token (Token::p_lparen, c.line (), c.column ()); - } - case ')': - { - return Token (Token::p_rparen, c.line (), c.column ()); + return call_expression (c); } case '=': { @@ -171,7 +173,7 @@ next () // cerr << id_ << ':' << c.line () << ':' << c.column () << ": error: unexpected character '-'" << endl; - throw invalid_input (); + throw InvalidInput (); } break; @@ -190,9 +192,11 @@ next () cerr << id_ << ':' << c.line () << ':' << c.column () << ": error: unexpected character '" << c << "'" << endl; + throw InvalidInput (); } - catch (invalid_input const&) + catch (InvalidInput const&) { + valid_ = false; } // Try to recover. @@ -245,7 +249,7 @@ identifier (Char c) { cerr << id_ << ':' << c.line () << ':' << c.column () << ": error: " << "invalid character sequence '" << lexeme << "'" << endl; - throw invalid_input (); + throw InvalidInput (); } } @@ -302,7 +306,7 @@ char_literal (Char c) { cerr << id_ << ':' << c.line () << ':' << c.column () << ": error: " << "end of stream reached while reading character literal" << endl; - throw invalid_input (); + throw InvalidInput (); } lexeme += c; @@ -363,7 +367,7 @@ string_literal_trailer () { cerr << id_ << ':' << c.line () << ':' << c.column () << ": error: " << "end of stream reached while reading string literal" << endl; - throw invalid_input (); + throw InvalidInput (); } r += c; @@ -400,7 +404,7 @@ path_literal (Char c) { cerr << id_ << ':' << c.line () << ':' << c.column () << ": error: " << "end of stream reached while reading path literal" << endl; - throw invalid_input (); + throw InvalidInput (); } lexeme += c; @@ -411,3 +415,82 @@ path_literal (Char c) return Token (Token::t_path_lit, lexeme, ln, cl); } + +Token Lexer:: +call_expression (Char c) +{ + size_t ln (c.line ()), cl (c.column ()); + string lexeme; + lexeme += c; + size_t balance (1); + + while (balance != 0) + { + c = get (); + + if (is_eos (c)) + { + cerr << id_ << ':' << c.line () << ':' << c.column () << ": error: " + << "end of stream reached while reading call expression" << endl; + throw InvalidInput (); + } + + lexeme += c; + + switch (c) + { + case '(': + { + balance++; + break; + } + case ')': + { + balance--; + break; + } + } + } + + return Token (Token::t_call_expr, lexeme, ln, cl); +} + +Token Lexer:: +template_expression (Char c) +{ + size_t ln (c.line ()), cl (c.column ()); + string lexeme; + lexeme += c; + size_t balance (1); + + while (balance != 0) + { + c = get (); + + if (is_eos (c)) + { + cerr << id_ << ':' << c.line () << ':' << c.column () << ": error: " + << "end of stream reached while reading template expression" + << endl; + throw InvalidInput (); + } + + lexeme += c; + + switch (c) + { + case '<': + { + balance++; + break; + } + case '>': + { + balance--; + break; + } + } + } + + return Token (Token::t_template_expr, lexeme, ln, cl); +} diff --git a/cli/lexer.hxx b/cli/lexer.hxx index 1caceb3..50990c3 100644 --- a/cli/lexer.hxx +++ b/cli/lexer.hxx @@ -22,6 +22,9 @@ public: Token next (); + bool + valid () const; + protected: class Char { @@ -56,7 +59,7 @@ protected: peek (); protected: - class invalid_input {}; + class InvalidInput {}; void skip_spaces (); @@ -82,6 +85,12 @@ protected: Token path_literal (Char); + Token + call_expression (Char); + + Token + template_expression (Char); + protected: bool is_alpha (char c) const; @@ -120,6 +129,7 @@ private: bool eos_; bool include_; + bool valid_; }; #include "lexer.ixx" diff --git a/cli/lexer.ixx b/cli/lexer.ixx index f7ff77e..7e84cfc 100644 --- a/cli/lexer.ixx +++ b/cli/lexer.ixx @@ -38,6 +38,12 @@ column () const // Lexer // inline bool Lexer:: +valid () const +{ + return valid_; +} + +inline bool Lexer:: is_alpha (char c) const { return std::isalpha (c, loc_); diff --git a/cli/token.hxx b/cli/token.hxx index c12d097..08918f8 100644 --- a/cli/token.hxx +++ b/cli/token.hxx @@ -23,7 +23,9 @@ public: t_char_lit, t_bool_lit, t_int_lit, - t_float_lit + t_float_lit, + t_call_expr, // The so called "call expression", e.g., (2, a). + t_template_expr // The so called "template expression", e.g., . }; Type @@ -52,9 +54,12 @@ public: k_int, k_long, k_float, - k_double + k_double, + k_invalid }; + // Return the keyword id if type is t_keyword and k_invalid otherwise. + // Keyword keyword () const; @@ -71,15 +76,19 @@ public: { p_semi, p_comma, - p_colon, + p_dcolon, p_lcbrace, p_rcbrace, - p_lparen, - p_rparen, + // p_lparen, + // p_rparen, p_eq, - p_or + p_or, + p_invalid }; + // Return the punctuation id if type is t_punctuation and p_invalid + // otherwise. + // Punctuation punctuation () const; @@ -89,6 +98,12 @@ public: std::string const& literal () const; + // Expressions. + // +public: + std::string const& + expression () const; + // C-tors. // public: @@ -99,7 +114,7 @@ public: Token (Keyword k, std::size_t l, std::size_t c); Token (Punctuation p, std::size_t l, std::size_t c); - // Identifier & literals. + // Identifier, literals, and expressions. // Token (Type t, std::string const& s, std::size_t l, std::size_t c); diff --git a/cli/token.ixx b/cli/token.ixx index bcf040a..93db6d8 100644 --- a/cli/token.ixx +++ b/cli/token.ixx @@ -24,7 +24,7 @@ column () const inline Token::Keyword Token:: keyword () const { - return keyword_; + return type_ == t_keyword ? keyword_ : k_invalid; } inline std::string const& Token:: @@ -36,7 +36,7 @@ identifier () const inline Token::Punctuation Token:: punctuation () const { - return punctuation_; + return type_ == t_punctuation ? punctuation_ : p_invalid; } inline std::string const& Token:: @@ -45,6 +45,12 @@ literal () const return str_; } +inline std::string const& Token:: +expression () const +{ + return str_; +} + inline Token:: Token (std::size_t l, std::size_t c) : l_ (l), c_ (c), type_ (t_eos) diff --git a/tests/lexer/driver.cxx b/tests/lexer/driver.cxx index 62c236f..4b431eb 100644 --- a/tests/lexer/driver.cxx +++ b/tests/lexer/driver.cxx @@ -28,7 +28,7 @@ const char* keywords[] = "double" }; -const char punctuation[] = {';', ',', ':', '{', '}', '(', ')', '=', '|'}; +const char* punctuation[] = {";", ",", "::", "{", "}", /*"(", ")",*/ "=", "|"}; int main (int argc, char* argv[]) { @@ -100,6 +100,16 @@ int main (int argc, char* argv[]) cout << t.literal () << endl; break; } + case Token::t_call_expr: + { + cout << t.expression () << endl; + break; + } + case Token::t_template_expr: + { + cout << t.expression () << endl; + break; + } } } } diff --git a/tests/lexer/makefile b/tests/lexer/makefile index 3ecbae5..0764869 100644 --- a/tests/lexer/makefile +++ b/tests/lexer/makefile @@ -7,7 +7,7 @@ include $(dir $(lastword $(MAKEFILE_LIST)))../../build/bootstrap.make cxx_tun := driver.cxx -tests := 000 001 002 003 004 +tests := 000 001 002 003 004 005 # # diff --git a/tests/lexer/test-005.cli b/tests/lexer/test-005.cli new file mode 100644 index 0000000..6085a33 --- /dev/null +++ b/tests/lexer/test-005.cli @@ -0,0 +1,2 @@ +(abc, 123 - 345, 12.34) + diff --git a/tests/lexer/test-005.std b/tests/lexer/test-005.std new file mode 100644 index 0000000..20e941b --- /dev/null +++ b/tests/lexer/test-005.std @@ -0,0 +1,3 @@ +(abc, 123 - 345, 12.34) + + -- cgit v1.1