Separate tests and examples into individual packages

Also make cli module to be explicitly enabled via the config.cli configuration variable.
author: Karen Arutyunov <karen@codesynthesis.com> 2020-04-08 14:51:57 +0300
committer: Karen Arutyunov <karen@codesynthesis.com> 2020-04-27 11:38:53 +0300
commit: 720c5a33b6a49cf328fdd7611f49153cf8f60247 (patch)
tree: 9725f3d1f42ec90fde84520f49647edea013ce5e /cli/lexer.cxx
parent: 3183f3bb927a90783ae0aeaf190a0919377aabe4 (diff)
1 files changed, 0 insertions, 604 deletions
diff --git a/cli/lexer.cxx b/cli/lexer.cxx
deleted file mode 100644
index 573c76b..0000000
--- a/cli/lexer.cxx
+++ /dev/null
@@ -1,604 +0,0 @@
-// file      : cli/lexer.cxx
-// author    : Boris Kolpackov <boris@codesynthesis.com>
-// license   : MIT; see accompanying LICENSE file
-
-#include <iostream>
-
-#include <cli/lexer.hxx>
-
-using namespace std;
-
-lexer::
-lexer (istream& is, string const& id)
-    : loc_ ("C"),
-      is_ (is),
-      id_ (id),
-      l_ (1),
-      c_(1),
-      eos_ (false),
-      include_ (false),
-      valid_ (true),
-      buf_ (0, 0, 0),
-      unget_ (false)
-{
-  keyword_map_["source"]    = token::k_source;
-  keyword_map_["include"]   = token::k_include;
-  keyword_map_["namespace"] = token::k_namespace;
-  keyword_map_["class"]     = token::k_class;
-  keyword_map_["signed"]    = token::k_signed;
-  keyword_map_["unsigned"]  = token::k_unsigned;
-  keyword_map_["bool"]      = token::k_bool;
-  keyword_map_["char"]      = token::k_char;
-  keyword_map_["wchar_t"]   = token::k_wchar;
-  keyword_map_["short"]     = token::k_short;
-  keyword_map_["int"]       = token::k_int;
-  keyword_map_["long"]      = token::k_long;
-  keyword_map_["float"]     = token::k_float;
-  keyword_map_["double"]    = token::k_double;
-}
-
-lexer::xchar lexer::
-peek ()
-{
-  if (unget_)
-    return buf_;
-  else
-  {
-    if (eos_)
-      return xchar (xchar::traits_type::eof (), l_, c_);
-    else
-    {
-      xchar::int_type i (is_.peek ());
-
-      if (i == xchar::traits_type::eof ())
-        eos_ = true;
-
-      return xchar (i, l_, c_);
-    }
-  }
-}
-
-lexer::xchar lexer::
-get ()
-{
-  if (unget_)
-  {
-    unget_ = false;
-    return buf_;
-  }
-  else
-  {
-    // When is_.get () returns eof, the failbit is also set (stupid,
-    // isn't?) which may trigger an exception. To work around this
-    // we will call peek() first and only call get() if it is not
-    // eof. But we can only call peek() on eof once; any subsequent
-    // calls will spoil the failbit (even more stupid).
-    //
-    xchar c (peek ());
-
-    if (!is_eos (c))
-    {
-      is_.get ();
-
-      if (c == '\n')
-      {
-        l_++;
-        c_ = 1;
-      }
-      else
-        c_++;
-    }
-
-    return c;
-  }
-}
-
-void lexer::
-unget (xchar c)
-{
-  // Because iostream::unget cannot work once eos is reached,
-  // we have to provide our own implementation.
-  //
-  buf_ = c;
-  unget_ = true;
-}
-
-token lexer::
-next ()
-{
-  while (true) // Recovery loop.
-  {
-    bool include (include_);
-    include_ = false;
-
-    skip_spaces ();
-
-    xchar c (get ());
-
-    if (is_eos (c))
-      return token (c.line (), c.column ());
-
-    try
-    {
-      switch (c)
-      {
-      case '\'':
-        {
-          return char_literal (c);
-        }
-      case '\"':
-        {
-          if (include)
-            return path_literal (c);
-          else
-            return string_literal (c);
-        }
-      case '<':
-        {
-          if (include)
-            return path_literal (c);
-          else
-            return template_expression (c);
-        }
-      case ';':
-        {
-          return token (token::p_semi, c.line (), c.column ());
-        }
-      case ',':
-        {
-          return token (token::p_comma, c.line (), c.column ());
-        }
-      case ':':
-        {
-          if (peek () == ':')
-          {
-            get ();
-            return token (token::p_dcolon, c.line (), c.column ());
-          }
-
-          return token (token::p_colon, c.line (), c.column ());
-        }
-      case '{':
-        {
-          return token (token::p_lcbrace, c.line (), c.column ());
-        }
-      case '}':
-        {
-          return token (token::p_rcbrace, c.line (), c.column ());
-        }
-      case '(':
-        {
-          return call_expression (c);
-        }
-      case '=':
-        {
-          return token (token::p_eq, c.line (), c.column ());
-        }
-      case '|':
-        {
-          return token (token::p_or, c.line (), c.column ());
-        }
-      case '-':
-        {
-          // This can be a beginning of an identifier or a an integer
-          // literal. Figure out which one it is.
-          //
-          xchar p (peek ());
-
-          if (is_dec_digit (p))
-            return int_literal (get (), true, c.line (), c.column ());
-          else if (is_space (p))
-          {
-            skip_spaces ();
-            p = peek ();
-
-            if (is_dec_digit (p))
-              return int_literal (get (), true, c.line (), c.column ());
-
-            // Stray '-'.
-            //
-            cerr << id_ << ':' << c.line () << ':' << c.column ()
-                 << ": error: unexpected character '-'" << endl;
-            throw invalid_input ();
-          }
-
-          break;
-        }
-      }
-
-      if (is_alpha (c) || c == '_' || c == '-' || c == '/')
-      {
-        return identifier (c);
-      }
-
-      if (is_dec_digit (c))
-      {
-        return int_literal (c);
-      }
-
-      cerr << id_ << ':' << c.line () << ':' << c.column ()
-           << ": error: unexpected character '" << c << "'" << endl;
-      throw invalid_input ();
-    }
-    catch (invalid_input const&)
-    {
-      valid_ = false;
-    }
-
-    // Try to recover.
-    //
-    do
-    {
-      c = get ();
-
-      if (is_eos (c))
-        return token (c.line (), c.column ());
-    } while (c != ';');
-  }
-}
-
-void lexer::
-skip_spaces ()
-{
-  for (xchar c (peek ());; c = peek ())
-  {
-    if (is_eos (c))
-      break;
-
-    if (c == '/')
-    {
-      c = get ();
-      xchar p (peek ());
-
-      if (p == '/')
-      {
-        get ();
-
-        // C++ comment. Read until newline or eos.
-        //
-        for (c = get (); !is_eos (c) && c != '\n'; c = get ()) ;
-        continue;
-      }
-      else if (p == '*')
-      {
-        get ();
-
-        // C comment.
-        //
-        for (c = get ();; c = get ())
-        {
-          if (is_eos (c))
-          {
-            cerr << id_ << ':' << c.line () << ':' << c.column ()
-                 << ": error: end of stream reached while reading "
-                 << "C-style comment" << endl;
-            throw invalid_input ();
-          }
-
-          if (c == '*')
-          {
-            c = peek ();
-            if (c == '/')
-            {
-              get ();
-              break;
-            }
-          }
-        }
-        continue;
-      }
-      else
-      {
-        unget (c);
-        break;
-      }
-    }
-
-    if (!is_space (c))
-      break;
-
-    get ();
-  }
-}
-
-token lexer::
-identifier (xchar c)
-{
-  size_t ln (c.line ()), cl (c.column ());
-  string lexeme;
-  lexeme += c;
-
-  bool check (c == '-' || c == '/');
-
-  for (c = peek ();
-       !is_eos (c) && (is_alnum (c) || c == '_' || c == '-');
-       c = peek ())
-  {
-    get ();
-    lexeme += c;
-  }
-
-  // Check for invalid identifiers.
-  //
-  if (check)
-  {
-    size_t i (1);
-
-    for (; i < lexeme.size (); ++i)
-      if (is_alnum (lexeme[i]) || lexeme[i] == '_')
-        break;
-
-    if (i == lexeme.size ())
-    {
-      cerr << id_ << ':' << c.line () << ':' << c.column () << ": error: "
-           << "invalid character sequence '" << lexeme << "'" << endl;
-      throw invalid_input ();
-    }
-  }
-
-  keyword_map::const_iterator i (keyword_map_.find (lexeme));
-
-  if (i != keyword_map_.end ())
-  {
-    if (i->second == token::k_include || i->second == token::k_source)
-      include_ = true;
-
-    return token (i->second, ln, cl);
-  }
-
-  if (lexeme == "true" || lexeme == "false")
-    return token (token::t_bool_lit, lexeme, ln, cl);
-
-  return token (token::t_identifier, lexeme, ln, cl);
-}
-
-token lexer::
-int_literal (xchar c, bool neg, size_t ml, size_t mc)
-{
-  size_t ln (neg ? ml : c.line ()), cl (neg ? mc : c.column ());
-  string lexeme;
-
-  if (neg)
-    lexeme += '-';
-
-  lexeme += c;
-
-  for (c = peek (); !is_eos (c) && is_dec_digit (c); c = peek ())
-  {
-    get ();
-    lexeme += c;
-  }
-
-  return token (token::t_int_lit, lexeme, ln, cl);
-}
-
-token lexer::
-char_literal (xchar c)
-{
-  size_t ln (c.line ()), cl (c.column ());
-  string lexeme;
-  lexeme += c;
-
-  char p (c);
-
-  while (true)
-  {
-    c = get ();
-
-    if (is_eos (c))
-    {
-      cerr << id_ << ':' << c.line () << ':' << c.column () << ": error: "
-           << "end of stream reached while reading character literal" << endl;
-      throw invalid_input ();
-    }
-
-    lexeme += c;
-
-    if (c == '\'' && p != '\\')
-      break;
-
-    // We need to keep track of \\ escapings so we don't confuse
-    // them with \', as in '\\'.
-    //
-    if (c == '\\' && p == '\\')
-      p = '\0';
-    else
-      p = c;
-  }
-
-  return token (token::t_char_lit, lexeme, ln, cl);
-}
-
-token lexer::
-string_literal (xchar c)
-{
-  size_t ln (c.line ()), cl (c.column ());
-  string lexeme;
-  lexeme += c;
-
-  while (true)
-  {
-    lexeme += string_literal_trailer ();
-
-    // Check if there are more strings.
-    //
-    skip_spaces ();
-
-    c = peek ();
-
-    if (is_eos (c) || c != '"')
-      break;
-
-    get ();
-    lexeme += "\"";
-  }
-
-  return token (token::t_string_lit, lexeme, ln, cl);
-}
-
-string lexer::
-string_literal_trailer ()
-{
-  string r;
-  char p ('\0');
-
-  while (true)
-  {
-    xchar c = get ();
-
-    if (is_eos (c))
-    {
-      cerr << id_ << ':' << c.line () << ':' << c.column () << ": error: "
-           << "end of stream reached while reading string literal" << endl;
-      throw invalid_input ();
-    }
-
-    r += c;
-
-    if (c == '"' && p != '\\')
-      break;
-
-    // We need to keep track of \\ escapings so we don't confuse
-    // them with \", as in "\\".
-    //
-    if (c == '\\' && p == '\\')
-      p = '\0';
-    else
-      p = c;
-  }
-
-  return r;
-}
-
-token lexer::
-path_literal (xchar c)
-{
-  size_t ln (c.line ()), cl (c.column ());
-  string lexeme;
-  lexeme += c;
-
-  char end (c == '<' ? '>' : '"');
-
-  while (true)
-  {
-    c = get ();
-
-    if (is_eos (c))
-    {
-      cerr << id_ << ':' << c.line () << ':' << c.column () << ": error: "
-           << "end of stream reached while reading path literal" << endl;
-      throw invalid_input ();
-    }
-
-    lexeme += c;
-
-    if (c == end)
-      break;
-  }
-
-  token::token_type tt;
-
-  if (lexeme.compare (1, 4, "c++:") == 0)
-  {
-    tt = token::t_cxx_path_lit;
-    lexeme = lexeme[0] + string (lexeme, 5, string::npos);
-  }
-  else if (lexeme.compare (1, 4, "cli:") == 0)
-  {
-    tt = token::t_cli_path_lit;
-    lexeme = lexeme[0] + string (lexeme, 5, string::npos);
-  }
-  else
-  {
-    // See if the path ends with .cli. If not, then we assume this is
-    // a C++ inclusion.
-    //
-    size_t n (lexeme.size ());
-
-    if (n > 5 && lexeme.compare (n - 5, 4, ".cli") == 0)
-      tt = token::t_cli_path_lit;
-    else
-      tt = token::t_cxx_path_lit;
-  }
-
-  return token (tt, lexeme, ln, cl);
-}
-
-token lexer::
-call_expression (xchar c)
-{
-  size_t ln (c.line ()), cl (c.column ());
-  string lexeme;
-  lexeme += c;
-  size_t balance (1);
-
-  while (balance != 0)
-  {
-    c = get ();
-
-    if (is_eos (c))
-    {
-      cerr << id_ << ':' << c.line () << ':' << c.column () << ": error: "
-           << "end of stream reached while reading call expression" << endl;
-      throw invalid_input ();
-    }
-
-    lexeme += c;
-
-    switch (c)
-    {
-    case '(':
-      {
-        balance++;
-        break;
-      }
-    case ')':
-      {
-        balance--;
-        break;
-      }
-    }
-  }
-
-  return token (token::t_call_expr, lexeme, ln, cl);
-}
-
-token lexer::
-template_expression (xchar c)
-{
-  size_t ln (c.line ()), cl (c.column ());
-  string lexeme;
-  lexeme += c;
-  size_t balance (1);
-
-  while (balance != 0)
-  {
-    c = get ();
-
-    if (is_eos (c))
-    {
-      cerr << id_ << ':' << c.line () << ':' << c.column () << ": error: "
-           << "end of stream reached while reading template expression"
-           << endl;
-      throw invalid_input ();
-    }
-
-    lexeme += c;
-
-    switch (c)
-    {
-    case '<':
-      {
-        balance++;
-        break;
-      }
-    case '>':
-      {
-        balance--;
-        break;
-      }
-    }
-  }
-
-  return token (token::t_template_expr, lexeme, ln, cl);
-}
author	Karen Arutyunov <karen@codesynthesis.com>	2020-04-08 14:51:57 +0300
committer	Karen Arutyunov <karen@codesynthesis.com>	2020-04-27 11:38:53 +0300
commit	720c5a33b6a49cf328fdd7611f49153cf8f60247 (patch)
tree	9725f3d1f42ec90fde84520f49647edea013ce5e /cli/lexer.cxx
parent	3183f3bb927a90783ae0aeaf190a0919377aabe4 (diff)