1 files changed, 1728 insertions, 0 deletions
diff --git a/cli/cli/parser.cxx b/cli/cli/parser.cxx
new file mode 100644
index 0000000..4685edc
--- /dev/null
+++ b/cli/cli/parser.cxx
@@ -0,0 +1,1728 @@
+// file      : cli/parser.cxx
+// author    : Boris Kolpackov <boris@codesynthesis.com>
+// license   : MIT; see accompanying LICENSE file
+
+#ifndef _WIN32
+#  include <unistd.h>    // stat
+#  include <sys/types.h> // stat
+#  include <sys/stat.h>  // stat
+#else
+#  include <sys/types.h> // _stat
+#  include <sys/stat.h>  // _stat(), S_I*
+
+#  ifdef _MSC_VER // Unlikely to be fixed in newer versions.
+#    define S_ISREG(m) (((m) & S_IFMT) == S_IFREG)
+#  endif
+#endif
+
+#include <fstream>
+#include <sstream>
+#include <iostream>
+
+#include <cli/token.hxx>
+#include <cli/lexer.hxx>
+#include <cli/parser.hxx>
+
+#include <cli/semantics.hxx>
+
+using namespace std;
+using namespace semantics;
+
+// Check that the file exist without checking for permissions, etc.
+//
+inline static bool
+file_exists (const path& p)
+{
+#ifndef _WIN32
+  struct stat s;
+  int r (stat (p.string ().c_str (), &s));
+#else
+  struct _stat s;
+  int r (_stat (p.string ().c_str (), &s));
+#endif
+
+  return r == 0 && S_ISREG (s.st_mode);
+}
+
+const char* keywords[] =
+{
+  "include",
+  "namespace",
+  "class",
+  "signed",
+  "unsigned",
+  "bool",
+  "char",
+  "wchar_t",
+  "short",
+  "int",
+  "long",
+  "float",
+  "double"
+};
+
+const char* punctuation[] = {
+  ";", ",", ":", "::", "{", "}", /*"(", ")",*/ "=", "|"};
+
+// Output the token type and value in a format suitable for diagnostics.
+//
+std::ostream&
+operator<< (std::ostream& os, token const& t)
+{
+  switch (t.type ())
+  {
+  case token::t_eos:
+    {
+      os << "end-of-stream";
+      break;
+    }
+  case token::t_keyword:
+    {
+      os << "keyword '" << keywords[t.keyword ()] << "'";
+      break;
+    }
+  case token::t_identifier:
+    {
+      os << "identifier '" << t.identifier () << "'";
+      break;
+    }
+  case token::t_punctuation:
+    {
+      os << "'" << punctuation[t.punctuation ()] << "'";
+      break;
+    }
+  case token::t_cxx_path_lit:
+    {
+      os << "c++ path literal";
+      break;
+    }
+  case token::t_cli_path_lit:
+    {
+      os << "cli path literal";
+      break;
+    }
+  case token::t_string_lit:
+    {
+      os << "string literal";
+      break;
+    }
+  case token::t_char_lit:
+    {
+      os << "char literal";
+      break;
+    }
+  case token::t_bool_lit:
+    {
+      os << "bool literal";
+      break;
+    }
+  case token::t_int_lit:
+    {
+      os << "integer literal";
+      break;
+    }
+  case token::t_float_lit:
+    {
+      os << "floating point literal";
+      break;
+    }
+  case token::t_call_expr:
+    {
+      os << "call expression";
+      break;
+    }
+  case token::t_template_expr:
+    {
+      os << "template expression";
+      break;
+    }
+  }
+
+  return os;
+}
+
+// RAII-style set new value on construction, restore old one on destruction.
+//
+template <typename T>
+struct auto_restore
+{
+  auto_restore (T*& var, T* new_val = 0)
+      : var_ (var), old_val_ (var_)
+  {
+    if (new_val != 0)
+      var_ = new_val;
+  }
+
+  void
+  set (T* new_val) {var_ = new_val;}
+
+  ~auto_restore () {var_ = old_val_;}
+
+private:
+  T*& var_;
+  T* old_val_;
+};
+
+
+void parser::
+recover (token& t)
+{
+  // Recover by skipping past next ';' or '}'.
+  //
+  for (;; t = lexer_->next ())
+  {
+    if (t.type () == token::t_eos)
+      break;
+
+    token::punctuation_type p (t.punctuation ());
+
+    if (p == token::p_semi || p == token::p_rcbrace)
+    {
+      t = lexer_->next ();
+      break;
+    }
+  }
+}
+
+unique_ptr<cli_unit> parser::
+parse (std::istream& is, path const& p)
+{
+  unique_ptr<cli_unit> unit (new cli_unit (p, 1, 1));
+
+  {
+    path ap (p);
+    ap.absolute ();
+    ap.normalize ();
+    include_map_[ap] = unit.get ();
+  }
+
+  root_ = cur_ = unit.get ();
+
+  lexer l (is, p.string ());
+  lexer_ = &l;
+
+  doc_count_ = 0;
+
+  path_ = &p;
+  valid_ = true;
+
+  def_unit ();
+
+  if (!valid_ || !l.valid ())
+    throw invalid_input ();
+
+  return unit;
+}
+
+void parser::
+def_unit ()
+{
+  token t (lexer_->next ());
+
+  // include-decl-seq
+  //
+  for (token::keyword_type k (t.keyword ());
+       k == token::k_include || k == token::k_source;
+       k = t.keyword ())
+  {
+    try
+    {
+      if (k == token::k_include)
+        include_decl ();
+      else
+        source_decl ();
+
+      t = lexer_->next ();
+    }
+    catch (error const&)
+    {
+      valid_ = false;
+      recover (t);
+    }
+  }
+
+  auto_restore<scope> new_scope (scope_, cur_);
+
+  // decl-seq
+  //
+  while (t.type () != token::t_eos)
+  {
+    try
+    {
+      if (t.keyword () == token::k_source)
+      {
+        try
+        {
+          source_decl ();
+          t = lexer_->next ();
+        }
+        catch (error const&)
+        {
+          valid_ = false;
+          recover (t);
+        }
+
+        continue;
+      }
+
+      if (decl (t))
+      {
+        t = lexer_->next ();
+        continue;
+      }
+
+      cerr << *path_ << ':' << t.line () << ':' << t.column () << ": error: "
+           << "expected namespace, class, or documentation instead of "
+           << t << endl;
+      throw error ();
+    }
+    catch (error const&)
+    {
+      valid_ = false;
+      break; // Non-recoverable error.
+    }
+  }
+}
+
+void parser::
+source_decl ()
+{
+  token t (lexer_->next ());
+
+  if (t.type () != token::t_cli_path_lit)
+  {
+    cerr << *path_ << ':' << t.line () << ':' << t.column () << ": error: "
+         << "expected cli path literal instead of " << t << endl;
+    throw error ();
+  }
+
+  string const& l (t.literal ());
+  bool q (l[0] == '"'); // Quote or braket include?
+
+  path f;
+  try
+  {
+    f = path (string (l, 1, l.size () - 2));
+  }
+  catch (const invalid_path& e)
+  {
+    cerr << *path_ << ':' << t.line () << ':' << t.column () << ": error: "
+         << "'" << e.path () << "' is not a valid filesystem path" << endl;
+    valid_ = false;
+  }
+
+  if (valid_)
+  {
+    path p;
+
+    // If this is a quote include, then include relative to the current
+    // file.
+    //
+    if (q)
+    {
+      p = path_->directory () / f;
+      p.normalize ();
+    }
+    // Otherwise search the include directories (-I).
+    //
+    else
+    {
+      for (paths::const_iterator i (include_paths_.begin ());
+           i != include_paths_.end (); ++i)
+      {
+        p = *i / f;
+        p.normalize ();
+
+        if (file_exists (p))
+          break;
+
+        p.clear ();
+      }
+
+      if (p.empty ())
+      {
+        cerr << *path_ << ':' << t.line () << ':' << t.column () << ": "
+             << "error: file '" << f << "' not found in any of the "
+             << "include search directories (-I)" << endl;
+        valid_ = false;
+      }
+    }
+
+    if (valid_)
+    {
+      auto_restore<path const> new_path (path_, &p);
+
+      ifstream ifs (p.string ().c_str ());
+      if (ifs.is_open ())
+      {
+        ifs.exceptions (ifstream::failbit | ifstream::badbit);
+
+        try
+        {
+          lexer l (ifs, p.string ());
+          auto_restore<lexer> new_lexer (lexer_, &l);
+
+          def_unit ();
+
+          if (!l.valid ())
+            valid_ = false;
+        }
+        catch (std::ios_base::failure const&)
+        {
+          cerr << p << ": error: read failure" << endl;
+          valid_ = false;
+        }
+      }
+      else
+      {
+        cerr << p << ": error: unable to open in read mode" << endl;
+        valid_ = false;
+      }
+    }
+  }
+
+  t = lexer_->next ();
+
+  if (t.punctuation () != token::p_semi)
+  {
+    cerr << *path_ << ':' << t.line () << ':' << t.column () << ": error: "
+         << "expected ';' instead of " << t << endl;
+    throw error ();
+  }
+}
+
+void parser::
+include_decl ()
+{
+  token t (lexer_->next ());
+  token::token_type tt (t.type ());
+
+  if (tt != token::t_cxx_path_lit && tt != token::t_cli_path_lit)
+  {
+    cerr << *path_ << ':' << t.line () << ':' << t.column () << ": error: "
+         << "expected path literal instead of " << t << endl;
+    throw error ();
+  }
+
+  string const& l (t.literal ());
+  includes::kind_type ik (l[0] == '<' ? includes::bracket : includes::quote);
+
+  path f;
+  try
+  {
+    f = path (string (l, 1, l.size () - 2));
+  }
+  catch (const invalid_path& e)
+  {
+    cerr << *path_ << ':' << t.line () << ':' << t.column () << ": error: "
+         << "'" << e.path () << "' is not a valid filesystem path" << endl;
+    valid_ = false;
+  }
+
+  if (valid_)
+  {
+    if (tt == token::t_cxx_path_lit)
+    {
+      cxx_unit& n (
+        root_->new_node<cxx_unit> (*path_, t.line (), t.column ()));
+      root_->new_edge<cxx_includes> (*cur_, n, ik, f);
+    }
+    else
+    {
+      path p;
+      // If this is a quote include, then include relative to the current
+      // file.
+      //
+      if (ik == includes::quote)
+      {
+        p = path_->directory () / f;
+        p.normalize ();
+      }
+      // Otherwise search the include directories (-I).
+      //
+      else
+      {
+        for (paths::const_iterator i (include_paths_.begin ());
+             i != include_paths_.end (); ++i)
+        {
+          p = *i / f;
+          p.normalize ();
+
+          if (file_exists (p))
+            break;
+
+          p.clear ();
+        }
+
+        if (p.empty ())
+        {
+          cerr << *path_ << ':' << t.line () << ':' << t.column () << ": "
+               << "error: file '" << f << "' not found in any of the "
+               << "include search directories (-I)" << endl;
+          valid_ = false;
+        }
+      }
+
+      if (valid_)
+      {
+        // Detect and ignore multiple inclusions.
+        //
+        path ap (p);
+        ap.absolute ();
+        ap.normalize ();
+
+        include_map::iterator it (include_map_.find (ap));
+        if (it == include_map_.end ())
+        {
+          cli_unit& n (root_->new_node<cli_unit> (p, 1, 1));
+          root_->new_edge<cli_includes> (*cur_, n, ik, f);
+          include_map_[ap] = &n;
+
+          auto_restore<cli_unit> new_cur (cur_, &n);
+          auto_restore<path const> new_path (path_, &p);
+
+          ifstream ifs (p.string ().c_str ());
+          if (ifs.is_open ())
+          {
+            ifs.exceptions (ifstream::failbit | ifstream::badbit);
+
+            try
+            {
+              lexer l (ifs, p.string ());
+              auto_restore<lexer> new_lexer (lexer_, &l);
+
+              def_unit ();
+
+              if (!l.valid ())
+                valid_ = false;
+            }
+            catch (std::ios_base::failure const&)
+            {
+              cerr << p << ": error: read failure" << endl;
+              valid_ = false;
+            }
+          }
+          else
+          {
+            cerr << p << ": error: unable to open in read mode" << endl;
+            valid_ = false;
+          }
+        }
+        else
+          root_->new_edge<cli_includes> (*cur_, *it->second, ik, f);
+      }
+    }
+  }
+
+  t = lexer_->next ();
+
+  if (t.punctuation () != token::p_semi)
+  {
+    cerr << *path_ << ':' << t.line () << ':' << t.column () << ": error: "
+         << "expected ';' instead of " << t << endl;
+    throw error ();
+  }
+}
+
+bool parser::
+decl (token& t)
+{
+  switch (t.type ())
+  {
+  case token::t_keyword:
+    {
+      switch (t.keyword ())
+      {
+      case token::k_namespace:
+        {
+          namespace_def ();
+          return true;
+        }
+      case token::k_class:
+        {
+          class_def ();
+          return true;
+        }
+      default:
+        break;
+      }
+
+      break;
+    }
+  case token::t_punctuation:
+    {
+      if (t.punctuation () != token::p_lcbrace)
+        break;
+    }
+    // Fall through.
+  case token::t_string_lit:
+    {
+      scope_doc (t);
+      return true;
+    }
+  default:
+    break;
+  }
+
+  return false;
+}
+
+void parser::
+scope_doc (token& t)
+{
+  size_t ln (t.line ()), cl (t.column ());
+
+  // Use a counter to give scope-level docs unique names. We use a
+  // single counter throughout all units/scope because we could be
+  // reopening namespaces.
+  //
+  if (t.type () == token::t_string_lit)
+  {
+    // string-literal
+    //
+    if (valid_)
+    {
+      // Enter each ""-enclosed string as a separate documentation
+      // entry, handle documentation variables.
+      //
+      const string& l (t.literal ());
+
+      char p ('\0');
+      for (size_t b (0), e (1); e < l.size (); ++e)
+      {
+        if (l[e] == '"' && p != '\\')
+        {
+          string s (doc_string (l.c_str () + b, e - b + 1));
+
+          if (!s.empty ())
+          {
+            doc& d (root_->new_node<doc> (*path_, ln, cl));
+
+            // See if this is a variable assignment: "\<var>=<val>".
+            //
+            size_t p (0); // '=' position.
+            if (s.size () >= 3 && s[0] == '\\' && s[1] != '\\')
+            {
+              for (p = 1; p != s.size (); ++p)
+              {
+                char c (s[p]);
+
+                // Variable name should be a C identifier.
+                //
+                if (!(c == '_' ||
+                      ('a' <= c && c <= 'z') ||
+                      ('A' <= c && c <= 'Z') ||
+                      (p != 1 && '0' <= c && c <= '9')))
+                  break;
+              }
+
+              if (p == s.size () || s[p] != '=' || p == 1) // Not a variable.
+                p = 0;
+            }
+
+            if (p != 0)
+            {
+              root_->new_edge<names> (
+                *scope_, d, "var: " + string (s, 1, p - 1));
+              s = string (s, p + 1);
+            }
+            else
+            {
+              ostringstream os;
+              os << "doc: " << doc_count_++;
+              root_->new_edge<names> (*scope_, d, os.str ());
+            }
+
+            d.push_back (s); // move().
+          }
+
+          // If we have more, then make b point to the opening '"'. Second
+          // ++e in for() above will make e point to the character after it.
+          //
+          b = ++e;
+          continue;
+        }
+
+        // We need to keep track of \\ escapings so we don't confuse
+        // them with \", as in \\".
+        //
+        if (l[e] == '\\' && p == '\\')
+          p = '\0';
+        else
+          p = l[e];
+      }
+    }
+  }
+  else
+  {
+    // doc-string-seq
+    //
+    assert (t.punctuation () == token::p_lcbrace);
+
+    doc* d (0);
+    if (valid_)
+    {
+      ostringstream os;
+      os << "doc: " << doc_count_++;
+
+      d = &root_->new_node<doc> (*path_, ln, cl);
+      root_->new_edge<names> (*scope_, *d, os.str ());
+    }
+
+    for (t = lexer_->next ();; t = lexer_->next ())
+    {
+      if (t.type () != token::t_string_lit)
+      {
+        cerr << *path_ << ':' << t.line () << ':' << t.column () << ": error: "
+             << "expected documentation string instead of " << t << endl;
+        throw error ();
+      }
+
+      if (valid_)
+        d->push_back (doc_string (t.literal ().c_str (),
+                                  t.literal ().size ()));
+
+      t = lexer_->next ();
+
+      if (t.punctuation () != token::p_comma)
+        break;
+    }
+
+    if (t.punctuation () != token::p_rcbrace)
+    {
+      cerr << *path_ << ':' << t.line () << ':' << t.column () << ": error: "
+           << "expected '}' instead of " << t << endl;
+      throw error ();
+    }
+  }
+}
+
+void parser::
+namespace_def ()
+{
+  token t (lexer_->next ());
+
+  if (t.type () != token::t_identifier)
+  {
+    cerr << *path_ << ':' << t.line () << ':' << t.column () << ": error: "
+         << "expected identifier instead of " << t << endl;
+    throw error ();
+  }
+
+  auto_restore<scope> new_scope (scope_);
+
+  if (valid_)
+  {
+    namespace_& n (
+      root_->new_node<namespace_> (*path_, t.line (), t.column ()));
+    root_->new_edge<names> (*scope_, n, t.identifier ());
+    new_scope.set (&n);
+  }
+
+  t = lexer_->next ();
+
+  if (t.punctuation () != token::p_lcbrace)
+  {
+    cerr << *path_ << ':' << t.line () << ':' << t.column () << ": error: "
+         << "expected '{' instead of " << t << endl;
+    throw error ();
+  }
+
+  // decl-seq
+  //
+  t = lexer_->next ();
+
+  while (decl (t))
+    t = lexer_->next ();
+
+  if (t.punctuation () != token::p_rcbrace)
+  {
+    cerr << *path_ << ':' << t.line () << ':' << t.column () << ": error: "
+         << "expected namespace, class, documentation, or '}' instead of "
+         << t << endl;
+    throw error ();
+  }
+}
+
+void parser::
+class_def ()
+{
+  token t (lexer_->next ());
+
+  if (t.type () != token::t_identifier)
+  {
+    cerr << *path_ << ':' << t.line () << ':' << t.column () << ": error: "
+         << "expected identifier instead of " << t << endl;
+    throw error ();
+  }
+
+  class_* n (0);
+  if (valid_)
+  {
+    n = &root_->new_node<class_> (*path_, t.line (), t.column ());
+    root_->new_edge<names> (*scope_, *n, t.identifier ());
+  }
+
+  t = lexer_->next ();
+
+  // inheritance-spec
+  //
+  if (t.punctuation () == token::p_colon)
+  {
+    for (;;)
+    {
+      t = lexer_->next ();
+      size_t line (t.line ()), col (t.column ());
+
+      string name;
+      if (!qualified_name (t, name))
+      {
+        cerr << *path_ << ':' << t.line () << ':' << t.column () << ": error: "
+             << "expected qualified name instead of " << t << endl;
+        throw error ();
+      }
+
+      string ns;
+
+      // If it is a fully-qualifed name, then start from the global namespace.
+      // Otherwise, from the current scope.
+      //
+      if (name[0] == ':')
+        name = string (name, 2, string::npos);
+      else
+        ns = scope_->fq_name ();
+
+      if (class_* b = cur_->lookup<class_> (ns, name))
+        root_->new_edge<inherits> (*n, *b);
+      else
+      {
+        cerr << *path_ << ':' << line << ':' << col << ": error: "
+             << "unable to resolve base class '" << name << "'" << endl;
+        valid_ = false;
+      }
+
+      if (t.punctuation () != token::p_comma)
+        break;
+    }
+  }
+
+  // abstract-spec
+  //
+  if (t.punctuation () == token::p_eq)
+  {
+    t = lexer_->next ();
+
+    if (t.type () != token::t_int_lit || t.literal () != "0")
+    {
+      cerr << *path_ << ':' << t.line () << ':' << t.column () << ": error: "
+           << "expected '0' instead of " << t << endl;
+      throw error ();
+    }
+
+    if (n != 0)
+      n->abstract (true);
+
+    t = lexer_->next ();
+  }
+
+  if (t.punctuation () != token::p_lcbrace)
+  {
+    cerr << *path_ << ':' << t.line () << ':' << t.column () << ": error: "
+         << "expected '{' instead of " << t << endl;
+    throw error ();
+  }
+
+  auto_restore<scope> new_scope (scope_, n);
+
+  // class-decl-seq
+  //
+  t = lexer_->next ();
+
+  for (;;)
+  {
+    try
+    {
+      if (t.type () == token::t_string_lit ||
+          t.punctuation () == token::p_lcbrace)
+      {
+        scope_doc (t);
+        t = lexer_->next ();
+      }
+      else
+      {
+        if (!option_def (t))
+          break;
+      }
+    }
+    catch (error const&)
+    {
+      valid_ = false;
+      recover (t);
+    }
+  }
+
+  if (t.punctuation () != token::p_rcbrace)
+  {
+    cerr << *path_ << ':' << t.line () << ':' << t.column () << ": error: "
+         << "expected option, documentation, or '}' instead of " << t << endl;
+    throw error ();
+  }
+
+  t = lexer_->next ();
+
+  if (t.punctuation () != token::p_semi)
+  {
+    cerr << *path_ << ':' << t.line () << ':' << t.column () << ": error: "
+         << "expected ';' instead of " << t << endl;
+    throw error ();
+  }
+}
+
+bool parser::
+option_def (token& t)
+{
+  size_t l (t.line ()), c (t.column ());
+
+  // type-spec
+  //
+  // These two functions set t to the next token if they return
+  // true.
+  //
+  string type_name;
+
+  if (!qualified_name (t, type_name) && !fundamental_type (t, type_name))
+    return false;
+
+  option* o (0);
+
+  if (valid_)
+  {
+    o = &root_->new_node<option> (*path_, l, c);
+    type& t (root_->new_type (*path_, l, c, type_name));
+    root_->new_edge<belongs> (*o, t);
+  }
+
+  // option-name-seq
+  //
+  names::name_list nl;
+  for (;;)
+  {
+    switch (t.type ())
+    {
+    case token::t_identifier:
+      {
+        if (valid_)
+          nl.push_back (t.identifier ());
+
+        break;
+      }
+    case token::t_string_lit:
+      {
+        if (valid_)
+        {
+          // Get rid of '"'.
+          //
+          string r;
+          string const& l (t.literal ());
+          char p ('\0');
+
+          for (size_t i (0), n (l.size ()); i < n; ++i)
+          {
+            if (l[i] == '"' && p != '\\')
+              continue;
+
+            // We need to keep track of \\ escapings so we don't confuse
+            // them with \", as in "\\".
+            //
+            if (l[i] == '\\' && p == '\\')
+              p = '\0';
+            else
+              p = l[i];
+
+            r += l[i];
+          }
+
+          nl.push_back (r);
+        }
+
+        break;
+      }
+    default:
+      {
+        cerr << *path_ << ':' << t.line () << ':' << t.column () << ": error: "
+             << "option name expected instead of " << t << endl;
+        throw error ();
+      }
+    }
+
+    t = lexer_->next ();
+
+    if (t.punctuation () == token::p_or)
+      t = lexer_->next ();
+    else
+      break;
+  }
+
+  if (valid_)
+    root_->new_edge<names> (*scope_, *o, nl);
+
+  // initializer
+  //
+  std::string ev;
+  expression::expression_type et;
+
+  if (t.punctuation () == token::p_eq)
+  {
+    // assignment initiaizer
+    //
+    t = lexer_->next ();
+
+    l = t.line ();
+    c = t.column ();
+
+    if (qualified_name (t, ev))
+    {
+      et = expression::identifier;
+    }
+    else
+    {
+      switch (t.type ())
+      {
+      case token::t_string_lit:
+        {
+          ev = t.literal ();
+          et = expression::string_lit;
+          t = lexer_->next ();
+          break;
+        }
+      case token::t_char_lit:
+        {
+          ev = t.literal ();
+          et = expression::char_lit;
+          t = lexer_->next ();
+          break;
+        }
+      case token::t_bool_lit:
+        {
+          ev = t.literal ();
+          et = expression::bool_lit;
+          t = lexer_->next ();
+          break;
+        }
+      case token::t_int_lit:
+        {
+          ev = t.literal ();
+          et = expression::int_lit;
+          t = lexer_->next ();
+          break;
+        }
+      case token::t_float_lit:
+        {
+          ev = t.literal ();
+          et = expression::float_lit;
+          t = lexer_->next ();
+          break;
+        }
+      case token::t_call_expr:
+        {
+          ev = t.expression ();
+          et = expression::call_expr;
+          t = lexer_->next ();
+          break;
+        }
+      default:
+        {
+          cerr << *path_ << ':' << t.line () << ':' << t.column ()
+               << ": error: expected intializer instead of " << t << endl;
+          throw error ();
+        }
+      }
+    }
+  }
+  else if (t.type () == token::t_call_expr)
+  {
+    // c-tor initializer
+    //
+    l = t.line ();
+    c = t.column ();
+
+    ev = t.expression ();
+    et = expression::call_expr;
+    t = lexer_->next ();
+  }
+
+  if (valid_ && !ev.empty ())
+  {
+    expression& e (root_->new_node<expression> (*path_, l, c, et, ev));
+    root_->new_edge<initialized> (*o, e);
+  }
+
+  // option-def-trailer
+  //
+  if (t.punctuation () == token::p_lcbrace)
+  {
+    // doc-string-seq
+    //
+    for (t = lexer_->next ();; t = lexer_->next ())
+    {
+      if (t.type () != token::t_string_lit)
+      {
+        cerr << *path_ << ':' << t.line () << ':' << t.column () << ": error: "
+             << "expected documentation string instead of " << t << endl;
+        throw error ();
+      }
+
+      if (valid_)
+        o->doc ().push_back (doc_string (t.literal ().c_str (),
+                                         t.literal ().size ()));
+
+      t = lexer_->next ();
+
+      if (t.punctuation () != token::p_comma)
+        break;
+    }
+
+    if (t.punctuation () != token::p_rcbrace)
+    {
+      cerr << *path_ << ':' << t.line () << ':' << t.column () << ": error: "
+           << "expected '}' instead of " << t << endl;
+      throw error ();
+    }
+
+    t = lexer_->next ();
+
+    // Allow semicolon after option-doc for backwards compatibility.
+    //
+    if (t.punctuation () == token::p_semi)
+      t = lexer_->next ();
+  }
+  else
+  {
+    if (t.punctuation () != token::p_semi)
+    {
+      cerr << *path_ << ':' << t.line () << ':' << t.column () << ": error: "
+           << "expected ';' instead of " << t << endl;
+      throw error ();
+    }
+
+    t = lexer_->next ();
+  }
+
+  return true;
+}
+
+string parser::
+doc_string (const char* l, size_t n)
+{
+  // Pass 1: get rid of " (as in "foo""bar"), convert \" to just ".
+  //
+  string t1, t2, t3;
+  char p ('\0'); // Previous character.
+
+  for (size_t i (0); i < n; ++i)
+  {
+    char c (l[i]);
+
+    if (c == '"')
+    {
+      if (p == '\\')
+      {
+        t1[t1.size () - 1] = '"'; // Replace \ with ".
+        p = c;
+      }
+      continue;
+    }
+
+    // We need to keep track of \\ escapings so we don't confuse them with \",
+    // as in \\".
+    //
+    if (c == '\\' && p == '\\')
+      p = '\0';
+    else
+      p = c;
+
+    t1 += c;
+  }
+
+  // Pass two: get rid of leading and trailing spaces in each line. Also
+  // handle pre-formatted fragments.
+  //
+  if (t1.size () != 0)
+  {
+    bool more (true);
+    size_t b (0), e, p;
+
+    bool pre (false);
+    size_t m (0); // Number of leading spaces to remove in pre.
+
+    while (more)
+    {
+      p = e = t1.find ('\n', b);
+
+      if (p == string::npos)
+      {
+        e = t1.size ();
+        more = false;
+      }
+
+      if (b != e) // Unless this is just a single newline.
+      {
+        // In the pre mode we only remove up to m leading whitespaces.
+        //
+        {
+          size_t i (0);
+          while (b < e &&
+                 (t1[b] == 0x20 || t1[b] == 0x0D || t1[b] == 0x09) &&
+                 (!pre || i != m))
+          {
+            ++b;
+            ++i;
+          }
+
+          if (!pre)
+            m = i;
+        }
+
+        --e;
+        while (e > b && (t1[e] == 0x20 || t1[e] == 0x0D || t1[e] == 0x09))
+          --e;
+
+        // Pre-formatted fragment marker or its escape.
+        //
+        if (t1[b] == '\\' && (b == e || (b + 1 == e && t1[e] == '\\')))
+        {
+          // Use Start of Text (0x02) and End of Text (0x03) special
+          // characters as pre-formatted fragment markers.
+          //
+          if (b == e)
+          {
+            pre = !pre;
+            t2 += (pre ? 0x02 : 0x03);
+          }
+          else
+            t2 += "\\\\"; // Keep escaped.
+        }
+        else if (b <= e)
+          t2.append (t1, b, e - b + 1);
+      }
+
+      if (more)
+      {
+        t2 += '\n';
+        b = p + 1;
+      }
+    }
+
+    if (pre)
+    {
+      cerr << *path_ << ": error: missing pre-formatted fragment end marker "
+           << "in documentation string '" << t1 << "'" << endl;
+      throw error ();
+    }
+  }
+
+  // Pass 3: replace every single newline with single space and all multiple
+  // newlines (paragraph marker) with a single newline, unless we are in a
+  // pre-formatted fragment. Also process escapes in pre-formatted fragmens.
+  //
+  bool pre (false);
+  p = '\0'; // Previous character in pre-formatted fragment.
+  for (size_t i (0), n (t2.size ()); i < n; ++i)
+  {
+    char c (t2[i]);
+
+    if (c == '\n' && !pre)
+    {
+      size_t j (i);
+      for (; i + 1 < n && t2[i + 1] == '\n'; ++i) ;
+
+      if (j != 0 && i + 1 != n) // Strip leading and trailing newlines.
+        t3 += i != j ? '\n' : ' ';
+    }
+    else
+    {
+      if (c == (pre ? 0x03 : 0x02))
+      {
+        pre = !pre;
+
+        // Kill "inner" newlines (after opening and before closing '/'
+        // markers). Also check for "outer" newlines so that we always
+        // have paragraph separation.
+        //
+        size_t k (t3.size ());
+        if (pre)
+        {
+          if (k != 0 && t3[k - 1] != '\n') // Outer.
+          {
+            cerr << *path_ << ": error: missing empty line before pre-"
+                 << "formatted fragment start marker in documentation "
+                 << "string '" << t1 << "'" << endl;
+            throw error ();
+          }
+
+          ++i; // Skip inner.
+        }
+        else
+        {
+          if (t3[k - 1] == '\n') // Could be the same as opening if empty.
+            t3.resize (k - 1); // Pop inner.
+
+          if (i + 2 < n && (t2[i + 1] != '\n' || t2[i + 2] != '\n')) // Outer.
+          {
+            cerr << *path_ << ": error: missing empty line after pre-"
+                 << "formatted fragment end marker in documentation "
+                 << "string '" << t1 << "'" << endl;
+            throw error ();
+          }
+        }
+
+        t3 += c;
+        continue;
+      }
+
+      if (pre)
+      {
+        // In the pre-formatted fragments the only two escapes that we
+        // recognize are \" which was handled on pass 1 above and \\ which we
+        // handle here.
+        //
+        if (c == '\\' && p == '\\')
+        {
+          p = '\0'; // Keep the already added and clear.
+          continue;
+        }
+
+        p = c;
+      }
+
+      t3 += c;
+    }
+  }
+
+  return t3;
+}
+
+
+bool parser::
+qualified_name (token& t, string& r)
+{
+  if (t.type () != token::t_identifier && t.punctuation () != token::p_dcolon)
+    return false;
+
+  r.clear ();
+
+  if (t.punctuation () == token::p_dcolon)
+  {
+    r += "::";
+    t = lexer_->next ();
+  }
+
+  for (;;)
+  {
+    if (t.type () != token::t_identifier)
+    {
+      cerr << *path_ << ':' << t.line () << ':' << t.column () << ": error: "
+           << "expected identifier after '::'" << endl;
+      throw error ();
+    }
+
+    r += t.identifier ();
+    t = lexer_->next ();
+
+    if (t.type () == token::t_template_expr)
+    {
+      // Template-id.
+      //
+      r += t.expression ();
+      t = lexer_->next ();
+    }
+
+    if (t.punctuation () == token::p_dcolon)
+    {
+      r += "::";
+      t = lexer_->next ();
+    }
+    else
+      break;
+  }
+
+  return true;
+}
+
+bool parser::
+fundamental_type (token& t, string& r)
+{
+  r.clear ();
+
+  switch (t.keyword ())
+  {
+  case token::k_signed:
+  case token::k_unsigned:
+    {
+      r = t.keyword () == token::k_signed ? "signed" : "unsigned";
+      switch ((t = lexer_->next ()).keyword ())
+      {
+      case token::k_short:
+        {
+          r += " short";
+          switch ((t = lexer_->next ()).keyword ())
+          {
+          case token::k_int:
+            {
+              r += " int";
+              t = lexer_->next ();
+            }
+          default:
+            break;
+          }
+          break;
+        }
+      case token::k_long:
+        {
+          r += " long";
+          switch ((t = lexer_->next ()).keyword ())
+          {
+          case token::k_int:
+            {
+              r += " int";
+              t = lexer_->next ();
+              break;
+            }
+          case token::k_long:
+            {
+              r += " long";
+              switch ((t = lexer_->next ()).keyword ())
+              {
+              case token::k_int:
+                {
+                  r += " int";
+                  t = lexer_->next ();
+                }
+              default:
+                break;
+              }
+              break;
+            }
+          default:
+            break;
+          }
+          break;
+        }
+      case token::k_int:
+        {
+          r += " int";
+          switch ((t = lexer_->next ()).keyword ())
+          {
+          case token::k_short:
+            {
+              r += " short";
+              t = lexer_->next ();
+              break;
+            }
+          case token::k_long:
+            {
+              r += " long";
+              switch ((t = lexer_->next ()).keyword ())
+              {
+              case token::k_long:
+                {
+                  r += " long";
+                  t = lexer_->next ();
+                }
+              default:
+                break;
+              }
+              break;
+            }
+          default:
+            break;
+          }
+          break;
+        }
+      case token::k_char:
+        {
+          r += " char";
+          t = lexer_->next ();
+          break;
+        }
+      default:
+        break;
+      }
+      break;
+    }
+  case token::k_short:
+  case token::k_long:
+    {
+      bool l (t.keyword () == token::k_long);
+      r = l ? "long" : "short";
+
+      switch ((t = lexer_->next ()).keyword ())
+      {
+      case token::k_signed:
+      case token::k_unsigned:
+        {
+          r += t.keyword () == token::k_signed ? " signed" : " unsigned";
+          switch ((t = lexer_->next ()).keyword ())
+          {
+          case token::k_int:
+            {
+              r += " int";
+              t = lexer_->next ();
+            }
+          default:
+            break;
+          }
+          break;
+        }
+      case token::k_long:
+        {
+          r += " long";
+          switch ((t = lexer_->next ()).keyword ())
+          {
+          case token::k_signed:
+          case token::k_unsigned:
+            {
+              r += t.keyword () == token::k_signed ? " signed" : " unsigned";
+              switch ((t = lexer_->next ()).keyword ())
+              {
+              case token::k_int:
+                {
+                  r += " int";
+                  t = lexer_->next ();
+                }
+              default:
+                break;
+              }
+              break;
+            }
+          case token::k_int:
+            {
+              r += " int";
+              switch ((t = lexer_->next ()).keyword ())
+              {
+              case token::k_signed:
+                {
+                  r += " signed";
+                  t = lexer_->next ();
+                  break;
+                }
+              case token::k_unsigned:
+                {
+                  r += " unsigned";
+                  t = lexer_->next ();
+                  break;
+                }
+              default:
+                break;
+              }
+              break;
+            }
+          default:
+            break;
+          }
+          break;
+        }
+      case token::k_int:
+        {
+          r += " int";
+          switch ((t = lexer_->next ()).keyword ())
+          {
+          case token::k_signed:
+            {
+              r += " signed";
+              t = lexer_->next ();
+              break;
+            }
+          case token::k_unsigned:
+            {
+              r += " unsigned";
+              t = lexer_->next ();
+              break;
+            }
+          default:
+            break;
+          }
+          break;
+        }
+      case token::k_double:
+        {
+          if (l)
+          {
+            r += " double";
+            t = lexer_->next ();
+          }
+          break;
+        }
+      default:
+        break;
+      }
+      break;
+    }
+  case token::k_int:
+    {
+      r = "int";
+      switch ((t = lexer_->next ()).keyword ())
+      {
+      case token::k_signed:
+      case token::k_unsigned:
+        {
+          r += t.keyword () == token::k_signed ? " signed" : " unsigned";
+          switch ((t = lexer_->next ()).keyword ())
+          {
+          case token::k_short:
+            {
+              r += " short";
+              t = lexer_->next ();
+              break;
+            }
+          case token::k_long:
+            {
+              r += " long";
+              switch ((t = lexer_->next ()).keyword ())
+              {
+              case token::k_long:
+                {
+                  r += " long";
+                  t = lexer_->next ();
+                }
+              default:
+                break;
+              }
+            }
+          default:
+            break;
+          }
+          break;
+        }
+      case token::k_short:
+        {
+          r += " short";
+          switch ((t = lexer_->next ()).keyword ())
+          {
+          case token::k_signed:
+            {
+              r += " signed";
+              t = lexer_->next ();
+              break;
+            }
+          case token::k_unsigned:
+            {
+              r += " unsigned";
+              t = lexer_->next ();
+              break;
+            }
+          default:
+            break;
+          }
+          break;
+        }
+      case token::k_long:
+        {
+          r += " long";
+          switch ((t = lexer_->next ()).keyword ())
+          {
+          case token::k_signed:
+            {
+              r += " signed";
+              t = lexer_->next ();
+              break;
+            }
+          case token::k_unsigned:
+            {
+              r += " unsigned";
+              t = lexer_->next ();
+              break;
+            }
+          case token::k_long:
+            {
+              r += " long";
+              switch ((t = lexer_->next ()).keyword ())
+              {
+              case token::k_signed:
+                {
+                  r += " signed";
+                  t = lexer_->next ();
+                  break;
+                }
+              case token::k_unsigned:
+                {
+                  r += " unsigned";
+                  t = lexer_->next ();
+                  break;
+                }
+              default:
+                break;
+              }
+              break;
+            }
+          default:
+            break;
+          }
+          break;
+        }
+      default:
+        break;
+      }
+      break;
+    }
+  case token::k_char:
+    {
+      r = "char";
+      switch ((t = lexer_->next ()).keyword ())
+      {
+      case token::k_signed:
+        {
+          r += " signed";
+          t = lexer_->next ();
+          break;
+        }
+      case token::k_unsigned:
+        {
+          r += " unsigned";
+          t = lexer_->next ();
+          break;
+        }
+      default:
+        break;
+      }
+      break;
+    }
+  case token::k_bool:
+    {
+      r = "bool";
+      t = lexer_->next ();
+      break;
+    }
+  case token::k_wchar:
+    {
+      r = "wchar_t";
+      t = lexer_->next ();
+      break;
+    }
+  case token::k_float:
+    {
+      r = "float";
+      t = lexer_->next ();
+      break;
+    }
+  case token::k_double:
+    {
+      r = "double";
+      switch ((t = lexer_->next ()).keyword ())
+      {
+      case token::k_long:
+        {
+          r += " long";
+          t = lexer_->next ();
+        }
+      default:
+        break;
+      }
+      break;
+    }
+  default:
+    return false;
+  }
+
+  return true;
+}