summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--cli/lexer.cxx113
-rw-r--r--cli/lexer.hxx12
-rw-r--r--cli/lexer.ixx6
-rw-r--r--cli/token.hxx29
-rw-r--r--cli/token.ixx10
-rw-r--r--tests/lexer/driver.cxx12
-rw-r--r--tests/lexer/makefile2
-rw-r--r--tests/lexer/test-005.cli2
-rw-r--r--tests/lexer/test-005.std3
9 files changed, 162 insertions, 27 deletions
diff --git a/cli/lexer.cxx b/cli/lexer.cxx
index 203667a..6cf012d 100644
--- a/cli/lexer.cxx
+++ b/cli/lexer.cxx
@@ -17,7 +17,8 @@ Lexer (istream& is, string const& id)
l_ (1),
c_(1),
eos_ (false),
- include_ (false)
+ include_ (false),
+ valid_ (true)
{
keyword_map_["include"] = Token::k_include;
keyword_map_["namespace"] = Token::k_namespace;
@@ -111,8 +112,8 @@ next ()
{
if (include)
return path_literal (c);
-
- break;
+ else
+ return template_expression (c);
}
case ';':
{
@@ -124,7 +125,12 @@ next ()
}
case ':':
{
- return Token (Token::p_colon, c.line (), c.column ());
+ if (peek () == ':')
+ {
+ get ();
+ return Token (Token::p_dcolon, c.line (), c.column ());
+ }
+ break;
}
case '{':
{
@@ -136,11 +142,7 @@ next ()
}
case '(':
{
- return Token (Token::p_lparen, c.line (), c.column ());
- }
- case ')':
- {
- return Token (Token::p_rparen, c.line (), c.column ());
+ return call_expression (c);
}
case '=':
{
@@ -171,7 +173,7 @@ next ()
//
cerr << id_ << ':' << c.line () << ':' << c.column ()
<< ": error: unexpected character '-'" << endl;
- throw invalid_input ();
+ throw InvalidInput ();
}
break;
@@ -190,9 +192,11 @@ next ()
cerr << id_ << ':' << c.line () << ':' << c.column ()
<< ": error: unexpected character '" << c << "'" << endl;
+ throw InvalidInput ();
}
- catch (invalid_input const&)
+ catch (InvalidInput const&)
{
+ valid_ = false;
}
// Try to recover.
@@ -245,7 +249,7 @@ identifier (Char c)
{
cerr << id_ << ':' << c.line () << ':' << c.column () << ": error: "
<< "invalid character sequence '" << lexeme << "'" << endl;
- throw invalid_input ();
+ throw InvalidInput ();
}
}
@@ -302,7 +306,7 @@ char_literal (Char c)
{
cerr << id_ << ':' << c.line () << ':' << c.column () << ": error: "
<< "end of stream reached while reading character literal" << endl;
- throw invalid_input ();
+ throw InvalidInput ();
}
lexeme += c;
@@ -363,7 +367,7 @@ string_literal_trailer ()
{
cerr << id_ << ':' << c.line () << ':' << c.column () << ": error: "
<< "end of stream reached while reading string literal" << endl;
- throw invalid_input ();
+ throw InvalidInput ();
}
r += c;
@@ -400,7 +404,7 @@ path_literal (Char c)
{
cerr << id_ << ':' << c.line () << ':' << c.column () << ": error: "
<< "end of stream reached while reading path literal" << endl;
- throw invalid_input ();
+ throw InvalidInput ();
}
lexeme += c;
@@ -411,3 +415,82 @@ path_literal (Char c)
return Token (Token::t_path_lit, lexeme, ln, cl);
}
+
+Token Lexer::
+call_expression (Char c)
+{
+ size_t ln (c.line ()), cl (c.column ());
+ string lexeme;
+ lexeme += c;
+ size_t balance (1);
+
+ while (balance != 0)
+ {
+ c = get ();
+
+ if (is_eos (c))
+ {
+ cerr << id_ << ':' << c.line () << ':' << c.column () << ": error: "
+ << "end of stream reached while reading call expression" << endl;
+ throw InvalidInput ();
+ }
+
+ lexeme += c;
+
+ switch (c)
+ {
+ case '(':
+ {
+ balance++;
+ break;
+ }
+ case ')':
+ {
+ balance--;
+ break;
+ }
+ }
+ }
+
+ return Token (Token::t_call_expr, lexeme, ln, cl);
+}
+
+Token Lexer::
+template_expression (Char c)
+{
+ size_t ln (c.line ()), cl (c.column ());
+ string lexeme;
+ lexeme += c;
+ size_t balance (1);
+
+ while (balance != 0)
+ {
+ c = get ();
+
+ if (is_eos (c))
+ {
+ cerr << id_ << ':' << c.line () << ':' << c.column () << ": error: "
+ << "end of stream reached while reading template expression"
+ << endl;
+ throw InvalidInput ();
+ }
+
+ lexeme += c;
+
+ switch (c)
+ {
+ case '<':
+ {
+ balance++;
+ break;
+ }
+ case '>':
+ {
+ balance--;
+ break;
+ }
+ }
+ }
+
+ return Token (Token::t_template_expr, lexeme, ln, cl);
+}
diff --git a/cli/lexer.hxx b/cli/lexer.hxx
index 1caceb3..50990c3 100644
--- a/cli/lexer.hxx
+++ b/cli/lexer.hxx
@@ -22,6 +22,9 @@ public:
Token
next ();
+ bool
+ valid () const;
+
protected:
class Char
{
@@ -56,7 +59,7 @@ protected:
peek ();
protected:
- class invalid_input {};
+ class InvalidInput {};
void
skip_spaces ();
@@ -82,6 +85,12 @@ protected:
Token
path_literal (Char);
+ Token
+ call_expression (Char);
+
+ Token
+ template_expression (Char);
+
protected:
bool
is_alpha (char c) const;
@@ -120,6 +129,7 @@ private:
bool eos_;
bool include_;
+ bool valid_;
};
#include "lexer.ixx"
diff --git a/cli/lexer.ixx b/cli/lexer.ixx
index f7ff77e..7e84cfc 100644
--- a/cli/lexer.ixx
+++ b/cli/lexer.ixx
@@ -38,6 +38,12 @@ column () const
// Lexer
//
inline bool Lexer::
+valid () const
+{
+ return valid_;
+}
+
+inline bool Lexer::
is_alpha (char c) const
{
return std::isalpha (c, loc_);
diff --git a/cli/token.hxx b/cli/token.hxx
index c12d097..08918f8 100644
--- a/cli/token.hxx
+++ b/cli/token.hxx
@@ -23,7 +23,9 @@ public:
t_char_lit,
t_bool_lit,
t_int_lit,
- t_float_lit
+ t_float_lit,
+ t_call_expr, // The so called "call expression", e.g., (2, a).
+ t_template_expr // The so called "template expression", e.g., <foo, 3>.
};
Type
@@ -52,9 +54,12 @@ public:
k_int,
k_long,
k_float,
- k_double
+ k_double,
+ k_invalid
};
+ // Return the keyword id if type is t_keyword and k_invalid otherwise.
+ //
Keyword
keyword () const;
@@ -71,15 +76,19 @@ public:
{
p_semi,
p_comma,
- p_colon,
+ p_dcolon,
p_lcbrace,
p_rcbrace,
- p_lparen,
- p_rparen,
+ // p_lparen,
+ // p_rparen,
p_eq,
- p_or
+ p_or,
+ p_invalid
};
+ // Return the punctuation id if type is t_punctuation and p_invalid
+ // otherwise.
+ //
Punctuation
punctuation () const;
@@ -89,6 +98,12 @@ public:
std::string const&
literal () const;
+ // Expressions.
+ //
+public:
+ std::string const&
+ expression () const;
+
// C-tors.
//
public:
@@ -99,7 +114,7 @@ public:
Token (Keyword k, std::size_t l, std::size_t c);
Token (Punctuation p, std::size_t l, std::size_t c);
- // Identifier & literals.
+ // Identifier, literals, and expressions.
//
Token (Type t, std::string const& s, std::size_t l, std::size_t c);
diff --git a/cli/token.ixx b/cli/token.ixx
index bcf040a..93db6d8 100644
--- a/cli/token.ixx
+++ b/cli/token.ixx
@@ -24,7 +24,7 @@ column () const
inline Token::Keyword Token::
keyword () const
{
- return keyword_;
+ return type_ == t_keyword ? keyword_ : k_invalid;
}
inline std::string const& Token::
@@ -36,7 +36,7 @@ identifier () const
inline Token::Punctuation Token::
punctuation () const
{
- return punctuation_;
+ return type_ == t_punctuation ? punctuation_ : p_invalid;
}
inline std::string const& Token::
@@ -45,6 +45,12 @@ literal () const
return str_;
}
+inline std::string const& Token::
+expression () const
+{
+ return str_;
+}
+
inline Token::
Token (std::size_t l, std::size_t c)
: l_ (l), c_ (c), type_ (t_eos)
diff --git a/tests/lexer/driver.cxx b/tests/lexer/driver.cxx
index 62c236f..4b431eb 100644
--- a/tests/lexer/driver.cxx
+++ b/tests/lexer/driver.cxx
@@ -28,7 +28,7 @@ const char* keywords[] =
"double"
};
-const char punctuation[] = {';', ',', ':', '{', '}', '(', ')', '=', '|'};
+const char* punctuation[] = {";", ",", "::", "{", "}", /*"(", ")",*/ "=", "|"};
int main (int argc, char* argv[])
{
@@ -100,6 +100,16 @@ int main (int argc, char* argv[])
cout << t.literal () << endl;
break;
}
+ case Token::t_call_expr:
+ {
+ cout << t.expression () << endl;
+ break;
+ }
+ case Token::t_template_expr:
+ {
+ cout << t.expression () << endl;
+ break;
+ }
}
}
}
diff --git a/tests/lexer/makefile b/tests/lexer/makefile
index 3ecbae5..0764869 100644
--- a/tests/lexer/makefile
+++ b/tests/lexer/makefile
@@ -7,7 +7,7 @@ include $(dir $(lastword $(MAKEFILE_LIST)))../../build/bootstrap.make
cxx_tun := driver.cxx
-tests := 000 001 002 003 004
+tests := 000 001 002 003 004 005
#
#
diff --git a/tests/lexer/test-005.cli b/tests/lexer/test-005.cli
new file mode 100644
index 0000000..6085a33
--- /dev/null
+++ b/tests/lexer/test-005.cli
@@ -0,0 +1,2 @@
+(abc, 123 - 345, 12.34)
+<foo, bar::baz, 123*345>
diff --git a/tests/lexer/test-005.std b/tests/lexer/test-005.std
new file mode 100644
index 0000000..20e941b
--- /dev/null
+++ b/tests/lexer/test-005.std
@@ -0,0 +1,3 @@
+(abc, 123 - 345, 12.34)
+<foo, bar::baz, 123*345>
+<EOS>