From 8387a0b45df48cd99bcd62f81d175cde509dc091 Mon Sep 17 00:00:00 2001 From: Boris Kolpackov Date: Sat, 22 Aug 2009 11:17:17 +0200 Subject: Add support for C and C++-style comments --- cli/lexer.cxx | 138 +++++++++++++++++++++++++++++++++++++---------- cli/lexer.hxx | 10 +++- tests/lexer/makefile | 2 +- tests/lexer/test-006.cli | 14 +++++ tests/lexer/test-006.std | 7 +++ 5 files changed, 140 insertions(+), 31 deletions(-) create mode 100644 tests/lexer/test-006.cli create mode 100644 tests/lexer/test-006.std diff --git a/cli/lexer.cxx b/cli/lexer.cxx index 6cf012d..dc57b21 100644 --- a/cli/lexer.cxx +++ b/cli/lexer.cxx @@ -18,7 +18,9 @@ Lexer (istream& is, string const& id) c_(1), eos_ (false), include_ (false), - valid_ (true) + valid_ (true), + buf_ (0, 0, 0), + unget_ (false) { keyword_map_["include"] = Token::k_include; keyword_map_["namespace"] = Token::k_namespace; @@ -36,48 +38,71 @@ Lexer (istream& is, string const& id) } Lexer::Char Lexer:: -get () +peek () { - // When is_.get () returns eof, the failbit is also set (stupid, - // isn't?) which may trigger an exception. To work around this - // we will call peek() first and only call get() if it is not - // eof. But we can only call peek() on eof once; any subsequent - // calls will spoil the failbit (even more stupid). - // - Char c (peek ()); - - if (!is_eos (c)) + if (unget_) + return buf_; + else { - is_.get (); - - if (c == '\n') + if (eos_) + return Char (Char::Traits::eof (), l_, c_); + else { - l_++; - c_ = 1; + Char::IntType i (is_.peek ()); + + if (i == Char::Traits::eof ()) + eos_ = true; + + return Char (i, l_, c_); } - else - c_++; } - - return c; } Lexer::Char Lexer:: -peek () +get () { - if (eos_) - return Char (Char::Traits::eof (), l_, c_); + if (unget_) + { + unget_ = false; + return buf_; + } else { - Char::IntType i (is_.peek ()); + // When is_.get () returns eof, the failbit is also set (stupid, + // isn't?) which may trigger an exception. To work around this + // we will call peek() first and only call get() if it is not + // eof. But we can only call peek() on eof once; any subsequent + // calls will spoil the failbit (even more stupid). + // + Char c (peek ()); + + if (!is_eos (c)) + { + is_.get (); - if (i == Char::Traits::eof ()) - eos_ = true; + if (c == '\n') + { + l_++; + c_ = 1; + } + else + c_++; + } - return Char (i, l_, c_); + return c; } } +void Lexer:: +unget (Char c) +{ + // Because iostream::unget cannot work once eos is reached, + // we have to provide our own implementation. + // + buf_ = c; + unget_ = true; +} + Token Lexer:: next () { @@ -214,8 +239,65 @@ next () void Lexer:: skip_spaces () { - for (Char c (peek ()); !is_eos (c) && is_space (c); c = peek ()) + for (Char c (peek ());; c = peek ()) + { + if (is_eos (c)) + break; + + if (c == '/') + { + c = get (); + Char p (peek ()); + + if (p == '/') + { + get (); + + // C++ comment. Read until newline or eos. + // + for (c = get (); !is_eos (c) && c != '\n'; c = get ()) ; + continue; + } + else if (p == '*') + { + get (); + + // C comment. + // + for (c = get ();; c = get ()) + { + if (is_eos (c)) + { + cerr << id_ << ':' << c.line () << ':' << c.column () + << ": error: end of stream reached while reading " + << "C-style comment" << endl; + throw InvalidInput (); + } + + if (c == '*') + { + c = peek (); + if (c == '/') + { + get (); + break; + } + } + } + continue; + } + else + { + unget (c); + break; + } + } + + if (!is_space (c)) + break; + get (); + } } Token Lexer:: diff --git a/cli/lexer.hxx b/cli/lexer.hxx index 50990c3..c69021f 100644 --- a/cli/lexer.hxx +++ b/cli/lexer.hxx @@ -53,10 +53,13 @@ protected: }; Char - get (); + peek (); Char - peek (); + get (); + + void + unget (Char); protected: class InvalidInput {}; @@ -130,6 +133,9 @@ private: bool eos_; bool include_; bool valid_; + + Char buf_; + bool unget_; }; #include "lexer.ixx" diff --git a/tests/lexer/makefile b/tests/lexer/makefile index 0764869..71d46f7 100644 --- a/tests/lexer/makefile +++ b/tests/lexer/makefile @@ -7,7 +7,7 @@ include $(dir $(lastword $(MAKEFILE_LIST)))../../build/bootstrap.make cxx_tun := driver.cxx -tests := 000 001 002 003 004 005 +tests := 000 001 002 003 004 005 006 # # diff --git a/tests/lexer/test-006.cli b/tests/lexer/test-006.cli new file mode 100644 index 0000000..706f0f2 --- /dev/null +++ b/tests/lexer/test-006.cli @@ -0,0 +1,14 @@ +// c++ comment ; +/* c comment ; */ +; +"a" // foo +"b" +"a" /* foo +bar +baz */ "b"; +- // aaa +5; +- /* a +a +a*/ 5 +// eos \ No newline at end of file diff --git a/tests/lexer/test-006.std b/tests/lexer/test-006.std new file mode 100644 index 0000000..eaa9964 --- /dev/null +++ b/tests/lexer/test-006.std @@ -0,0 +1,7 @@ +; +"a" "b" "a" "b" +; +-5 +; +-5 + -- cgit v1.1