summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorBoris Kolpackov <boris@codesynthesis.com>2009-08-22 11:17:17 +0200
committerBoris Kolpackov <boris@codesynthesis.com>2009-08-22 11:17:17 +0200
commit8387a0b45df48cd99bcd62f81d175cde509dc091 (patch)
tree06988610fffc6c1b07336271a0ed57024554ff33
parent2a24763c6fd6a75510ded0d030620aad1eba2b02 (diff)
Add support for C and C++-style comments
-rw-r--r--cli/lexer.cxx138
-rw-r--r--cli/lexer.hxx10
-rw-r--r--tests/lexer/makefile2
-rw-r--r--tests/lexer/test-006.cli14
-rw-r--r--tests/lexer/test-006.std7
5 files changed, 140 insertions, 31 deletions
diff --git a/cli/lexer.cxx b/cli/lexer.cxx
index 6cf012d..dc57b21 100644
--- a/cli/lexer.cxx
+++ b/cli/lexer.cxx
@@ -18,7 +18,9 @@ Lexer (istream& is, string const& id)
c_(1),
eos_ (false),
include_ (false),
- valid_ (true)
+ valid_ (true),
+ buf_ (0, 0, 0),
+ unget_ (false)
{
keyword_map_["include"] = Token::k_include;
keyword_map_["namespace"] = Token::k_namespace;
@@ -36,48 +38,71 @@ Lexer (istream& is, string const& id)
}
Lexer::Char Lexer::
-get ()
+peek ()
{
- // When is_.get () returns eof, the failbit is also set (stupid,
- // isn't?) which may trigger an exception. To work around this
- // we will call peek() first and only call get() if it is not
- // eof. But we can only call peek() on eof once; any subsequent
- // calls will spoil the failbit (even more stupid).
- //
- Char c (peek ());
-
- if (!is_eos (c))
+ if (unget_)
+ return buf_;
+ else
{
- is_.get ();
-
- if (c == '\n')
+ if (eos_)
+ return Char (Char::Traits::eof (), l_, c_);
+ else
{
- l_++;
- c_ = 1;
+ Char::IntType i (is_.peek ());
+
+ if (i == Char::Traits::eof ())
+ eos_ = true;
+
+ return Char (i, l_, c_);
}
- else
- c_++;
}
-
- return c;
}
Lexer::Char Lexer::
-peek ()
+get ()
{
- if (eos_)
- return Char (Char::Traits::eof (), l_, c_);
+ if (unget_)
+ {
+ unget_ = false;
+ return buf_;
+ }
else
{
- Char::IntType i (is_.peek ());
+ // When is_.get () returns eof, the failbit is also set (stupid,
+ // isn't?) which may trigger an exception. To work around this
+ // we will call peek() first and only call get() if it is not
+ // eof. But we can only call peek() on eof once; any subsequent
+ // calls will spoil the failbit (even more stupid).
+ //
+ Char c (peek ());
+
+ if (!is_eos (c))
+ {
+ is_.get ();
- if (i == Char::Traits::eof ())
- eos_ = true;
+ if (c == '\n')
+ {
+ l_++;
+ c_ = 1;
+ }
+ else
+ c_++;
+ }
- return Char (i, l_, c_);
+ return c;
}
}
+void Lexer::
+unget (Char c)
+{
+ // Because iostream::unget cannot work once eos is reached,
+ // we have to provide our own implementation.
+ //
+ buf_ = c;
+ unget_ = true;
+}
+
Token Lexer::
next ()
{
@@ -214,8 +239,65 @@ next ()
void Lexer::
skip_spaces ()
{
- for (Char c (peek ()); !is_eos (c) && is_space (c); c = peek ())
+ for (Char c (peek ());; c = peek ())
+ {
+ if (is_eos (c))
+ break;
+
+ if (c == '/')
+ {
+ c = get ();
+ Char p (peek ());
+
+ if (p == '/')
+ {
+ get ();
+
+ // C++ comment. Read until newline or eos.
+ //
+ for (c = get (); !is_eos (c) && c != '\n'; c = get ()) ;
+ continue;
+ }
+ else if (p == '*')
+ {
+ get ();
+
+ // C comment.
+ //
+ for (c = get ();; c = get ())
+ {
+ if (is_eos (c))
+ {
+ cerr << id_ << ':' << c.line () << ':' << c.column ()
+ << ": error: end of stream reached while reading "
+ << "C-style comment" << endl;
+ throw InvalidInput ();
+ }
+
+ if (c == '*')
+ {
+ c = peek ();
+ if (c == '/')
+ {
+ get ();
+ break;
+ }
+ }
+ }
+ continue;
+ }
+ else
+ {
+ unget (c);
+ break;
+ }
+ }
+
+ if (!is_space (c))
+ break;
+
get ();
+ }
}
Token Lexer::
diff --git a/cli/lexer.hxx b/cli/lexer.hxx
index 50990c3..c69021f 100644
--- a/cli/lexer.hxx
+++ b/cli/lexer.hxx
@@ -53,10 +53,13 @@ protected:
};
Char
- get ();
+ peek ();
Char
- peek ();
+ get ();
+
+ void
+ unget (Char);
protected:
class InvalidInput {};
@@ -130,6 +133,9 @@ private:
bool eos_;
bool include_;
bool valid_;
+
+ Char buf_;
+ bool unget_;
};
#include "lexer.ixx"
diff --git a/tests/lexer/makefile b/tests/lexer/makefile
index 0764869..71d46f7 100644
--- a/tests/lexer/makefile
+++ b/tests/lexer/makefile
@@ -7,7 +7,7 @@ include $(dir $(lastword $(MAKEFILE_LIST)))../../build/bootstrap.make
cxx_tun := driver.cxx
-tests := 000 001 002 003 004 005
+tests := 000 001 002 003 004 005 006
#
#
diff --git a/tests/lexer/test-006.cli b/tests/lexer/test-006.cli
new file mode 100644
index 0000000..706f0f2
--- /dev/null
+++ b/tests/lexer/test-006.cli
@@ -0,0 +1,14 @@
+// c++ comment ;
+/* c comment ; */
+;
+"a" // foo
+"b"
+"a" /* foo
+bar
+baz */ "b";
+- // aaa
+5;
+- /* a
+a
+a*/ 5
+// eos \ No newline at end of file
diff --git a/tests/lexer/test-006.std b/tests/lexer/test-006.std
new file mode 100644
index 0000000..eaa9964
--- /dev/null
+++ b/tests/lexer/test-006.std
@@ -0,0 +1,7 @@
+;
+"a" "b" "a" "b"
+;
+-5
+;
+-5
+<EOS>