From 720c5a33b6a49cf328fdd7611f49153cf8f60247 Mon Sep 17 00:00:00 2001 From: Karen Arutyunov Date: Wed, 8 Apr 2020 14:51:57 +0300 Subject: Separate tests and examples into individual packages Also make cli module to be explicitly enabled via the config.cli configuration variable. --- cli/parser.cxx | 1728 -------------------------------------------------------- 1 file changed, 1728 deletions(-) delete mode 100644 cli/parser.cxx (limited to 'cli/parser.cxx') diff --git a/cli/parser.cxx b/cli/parser.cxx deleted file mode 100644 index 4685edc..0000000 --- a/cli/parser.cxx +++ /dev/null @@ -1,1728 +0,0 @@ -// file : cli/parser.cxx -// author : Boris Kolpackov -// license : MIT; see accompanying LICENSE file - -#ifndef _WIN32 -# include // stat -# include // stat -# include // stat -#else -# include // _stat -# include // _stat(), S_I* - -# ifdef _MSC_VER // Unlikely to be fixed in newer versions. -# define S_ISREG(m) (((m) & S_IFMT) == S_IFREG) -# endif -#endif - -#include -#include -#include - -#include -#include -#include - -#include - -using namespace std; -using namespace semantics; - -// Check that the file exist without checking for permissions, etc. -// -inline static bool -file_exists (const path& p) -{ -#ifndef _WIN32 - struct stat s; - int r (stat (p.string ().c_str (), &s)); -#else - struct _stat s; - int r (_stat (p.string ().c_str (), &s)); -#endif - - return r == 0 && S_ISREG (s.st_mode); -} - -const char* keywords[] = -{ - "include", - "namespace", - "class", - "signed", - "unsigned", - "bool", - "char", - "wchar_t", - "short", - "int", - "long", - "float", - "double" -}; - -const char* punctuation[] = { - ";", ",", ":", "::", "{", "}", /*"(", ")",*/ "=", "|"}; - -// Output the token type and value in a format suitable for diagnostics. -// -std::ostream& -operator<< (std::ostream& os, token const& t) -{ - switch (t.type ()) - { - case token::t_eos: - { - os << "end-of-stream"; - break; - } - case token::t_keyword: - { - os << "keyword '" << keywords[t.keyword ()] << "'"; - break; - } - case token::t_identifier: - { - os << "identifier '" << t.identifier () << "'"; - break; - } - case token::t_punctuation: - { - os << "'" << punctuation[t.punctuation ()] << "'"; - break; - } - case token::t_cxx_path_lit: - { - os << "c++ path literal"; - break; - } - case token::t_cli_path_lit: - { - os << "cli path literal"; - break; - } - case token::t_string_lit: - { - os << "string literal"; - break; - } - case token::t_char_lit: - { - os << "char literal"; - break; - } - case token::t_bool_lit: - { - os << "bool literal"; - break; - } - case token::t_int_lit: - { - os << "integer literal"; - break; - } - case token::t_float_lit: - { - os << "floating point literal"; - break; - } - case token::t_call_expr: - { - os << "call expression"; - break; - } - case token::t_template_expr: - { - os << "template expression"; - break; - } - } - - return os; -} - -// RAII-style set new value on construction, restore old one on destruction. -// -template -struct auto_restore -{ - auto_restore (T*& var, T* new_val = 0) - : var_ (var), old_val_ (var_) - { - if (new_val != 0) - var_ = new_val; - } - - void - set (T* new_val) {var_ = new_val;} - - ~auto_restore () {var_ = old_val_;} - -private: - T*& var_; - T* old_val_; -}; - - -void parser:: -recover (token& t) -{ - // Recover by skipping past next ';' or '}'. - // - for (;; t = lexer_->next ()) - { - if (t.type () == token::t_eos) - break; - - token::punctuation_type p (t.punctuation ()); - - if (p == token::p_semi || p == token::p_rcbrace) - { - t = lexer_->next (); - break; - } - } -} - -unique_ptr parser:: -parse (std::istream& is, path const& p) -{ - unique_ptr unit (new cli_unit (p, 1, 1)); - - { - path ap (p); - ap.absolute (); - ap.normalize (); - include_map_[ap] = unit.get (); - } - - root_ = cur_ = unit.get (); - - lexer l (is, p.string ()); - lexer_ = &l; - - doc_count_ = 0; - - path_ = &p; - valid_ = true; - - def_unit (); - - if (!valid_ || !l.valid ()) - throw invalid_input (); - - return unit; -} - -void parser:: -def_unit () -{ - token t (lexer_->next ()); - - // include-decl-seq - // - for (token::keyword_type k (t.keyword ()); - k == token::k_include || k == token::k_source; - k = t.keyword ()) - { - try - { - if (k == token::k_include) - include_decl (); - else - source_decl (); - - t = lexer_->next (); - } - catch (error const&) - { - valid_ = false; - recover (t); - } - } - - auto_restore new_scope (scope_, cur_); - - // decl-seq - // - while (t.type () != token::t_eos) - { - try - { - if (t.keyword () == token::k_source) - { - try - { - source_decl (); - t = lexer_->next (); - } - catch (error const&) - { - valid_ = false; - recover (t); - } - - continue; - } - - if (decl (t)) - { - t = lexer_->next (); - continue; - } - - cerr << *path_ << ':' << t.line () << ':' << t.column () << ": error: " - << "expected namespace, class, or documentation instead of " - << t << endl; - throw error (); - } - catch (error const&) - { - valid_ = false; - break; // Non-recoverable error. - } - } -} - -void parser:: -source_decl () -{ - token t (lexer_->next ()); - - if (t.type () != token::t_cli_path_lit) - { - cerr << *path_ << ':' << t.line () << ':' << t.column () << ": error: " - << "expected cli path literal instead of " << t << endl; - throw error (); - } - - string const& l (t.literal ()); - bool q (l[0] == '"'); // Quote or braket include? - - path f; - try - { - f = path (string (l, 1, l.size () - 2)); - } - catch (const invalid_path& e) - { - cerr << *path_ << ':' << t.line () << ':' << t.column () << ": error: " - << "'" << e.path () << "' is not a valid filesystem path" << endl; - valid_ = false; - } - - if (valid_) - { - path p; - - // If this is a quote include, then include relative to the current - // file. - // - if (q) - { - p = path_->directory () / f; - p.normalize (); - } - // Otherwise search the include directories (-I). - // - else - { - for (paths::const_iterator i (include_paths_.begin ()); - i != include_paths_.end (); ++i) - { - p = *i / f; - p.normalize (); - - if (file_exists (p)) - break; - - p.clear (); - } - - if (p.empty ()) - { - cerr << *path_ << ':' << t.line () << ':' << t.column () << ": " - << "error: file '" << f << "' not found in any of the " - << "include search directories (-I)" << endl; - valid_ = false; - } - } - - if (valid_) - { - auto_restore new_path (path_, &p); - - ifstream ifs (p.string ().c_str ()); - if (ifs.is_open ()) - { - ifs.exceptions (ifstream::failbit | ifstream::badbit); - - try - { - lexer l (ifs, p.string ()); - auto_restore new_lexer (lexer_, &l); - - def_unit (); - - if (!l.valid ()) - valid_ = false; - } - catch (std::ios_base::failure const&) - { - cerr << p << ": error: read failure" << endl; - valid_ = false; - } - } - else - { - cerr << p << ": error: unable to open in read mode" << endl; - valid_ = false; - } - } - } - - t = lexer_->next (); - - if (t.punctuation () != token::p_semi) - { - cerr << *path_ << ':' << t.line () << ':' << t.column () << ": error: " - << "expected ';' instead of " << t << endl; - throw error (); - } -} - -void parser:: -include_decl () -{ - token t (lexer_->next ()); - token::token_type tt (t.type ()); - - if (tt != token::t_cxx_path_lit && tt != token::t_cli_path_lit) - { - cerr << *path_ << ':' << t.line () << ':' << t.column () << ": error: " - << "expected path literal instead of " << t << endl; - throw error (); - } - - string const& l (t.literal ()); - includes::kind_type ik (l[0] == '<' ? includes::bracket : includes::quote); - - path f; - try - { - f = path (string (l, 1, l.size () - 2)); - } - catch (const invalid_path& e) - { - cerr << *path_ << ':' << t.line () << ':' << t.column () << ": error: " - << "'" << e.path () << "' is not a valid filesystem path" << endl; - valid_ = false; - } - - if (valid_) - { - if (tt == token::t_cxx_path_lit) - { - cxx_unit& n ( - root_->new_node (*path_, t.line (), t.column ())); - root_->new_edge (*cur_, n, ik, f); - } - else - { - path p; - // If this is a quote include, then include relative to the current - // file. - // - if (ik == includes::quote) - { - p = path_->directory () / f; - p.normalize (); - } - // Otherwise search the include directories (-I). - // - else - { - for (paths::const_iterator i (include_paths_.begin ()); - i != include_paths_.end (); ++i) - { - p = *i / f; - p.normalize (); - - if (file_exists (p)) - break; - - p.clear (); - } - - if (p.empty ()) - { - cerr << *path_ << ':' << t.line () << ':' << t.column () << ": " - << "error: file '" << f << "' not found in any of the " - << "include search directories (-I)" << endl; - valid_ = false; - } - } - - if (valid_) - { - // Detect and ignore multiple inclusions. - // - path ap (p); - ap.absolute (); - ap.normalize (); - - include_map::iterator it (include_map_.find (ap)); - if (it == include_map_.end ()) - { - cli_unit& n (root_->new_node (p, 1, 1)); - root_->new_edge (*cur_, n, ik, f); - include_map_[ap] = &n; - - auto_restore new_cur (cur_, &n); - auto_restore new_path (path_, &p); - - ifstream ifs (p.string ().c_str ()); - if (ifs.is_open ()) - { - ifs.exceptions (ifstream::failbit | ifstream::badbit); - - try - { - lexer l (ifs, p.string ()); - auto_restore new_lexer (lexer_, &l); - - def_unit (); - - if (!l.valid ()) - valid_ = false; - } - catch (std::ios_base::failure const&) - { - cerr << p << ": error: read failure" << endl; - valid_ = false; - } - } - else - { - cerr << p << ": error: unable to open in read mode" << endl; - valid_ = false; - } - } - else - root_->new_edge (*cur_, *it->second, ik, f); - } - } - } - - t = lexer_->next (); - - if (t.punctuation () != token::p_semi) - { - cerr << *path_ << ':' << t.line () << ':' << t.column () << ": error: " - << "expected ';' instead of " << t << endl; - throw error (); - } -} - -bool parser:: -decl (token& t) -{ - switch (t.type ()) - { - case token::t_keyword: - { - switch (t.keyword ()) - { - case token::k_namespace: - { - namespace_def (); - return true; - } - case token::k_class: - { - class_def (); - return true; - } - default: - break; - } - - break; - } - case token::t_punctuation: - { - if (t.punctuation () != token::p_lcbrace) - break; - } - // Fall through. - case token::t_string_lit: - { - scope_doc (t); - return true; - } - default: - break; - } - - return false; -} - -void parser:: -scope_doc (token& t) -{ - size_t ln (t.line ()), cl (t.column ()); - - // Use a counter to give scope-level docs unique names. We use a - // single counter throughout all units/scope because we could be - // reopening namespaces. - // - if (t.type () == token::t_string_lit) - { - // string-literal - // - if (valid_) - { - // Enter each ""-enclosed string as a separate documentation - // entry, handle documentation variables. - // - const string& l (t.literal ()); - - char p ('\0'); - for (size_t b (0), e (1); e < l.size (); ++e) - { - if (l[e] == '"' && p != '\\') - { - string s (doc_string (l.c_str () + b, e - b + 1)); - - if (!s.empty ()) - { - doc& d (root_->new_node (*path_, ln, cl)); - - // See if this is a variable assignment: "\=". - // - size_t p (0); // '=' position. - if (s.size () >= 3 && s[0] == '\\' && s[1] != '\\') - { - for (p = 1; p != s.size (); ++p) - { - char c (s[p]); - - // Variable name should be a C identifier. - // - if (!(c == '_' || - ('a' <= c && c <= 'z') || - ('A' <= c && c <= 'Z') || - (p != 1 && '0' <= c && c <= '9'))) - break; - } - - if (p == s.size () || s[p] != '=' || p == 1) // Not a variable. - p = 0; - } - - if (p != 0) - { - root_->new_edge ( - *scope_, d, "var: " + string (s, 1, p - 1)); - s = string (s, p + 1); - } - else - { - ostringstream os; - os << "doc: " << doc_count_++; - root_->new_edge (*scope_, d, os.str ()); - } - - d.push_back (s); // move(). - } - - // If we have more, then make b point to the opening '"'. Second - // ++e in for() above will make e point to the character after it. - // - b = ++e; - continue; - } - - // We need to keep track of \\ escapings so we don't confuse - // them with \", as in \\". - // - if (l[e] == '\\' && p == '\\') - p = '\0'; - else - p = l[e]; - } - } - } - else - { - // doc-string-seq - // - assert (t.punctuation () == token::p_lcbrace); - - doc* d (0); - if (valid_) - { - ostringstream os; - os << "doc: " << doc_count_++; - - d = &root_->new_node (*path_, ln, cl); - root_->new_edge (*scope_, *d, os.str ()); - } - - for (t = lexer_->next ();; t = lexer_->next ()) - { - if (t.type () != token::t_string_lit) - { - cerr << *path_ << ':' << t.line () << ':' << t.column () << ": error: " - << "expected documentation string instead of " << t << endl; - throw error (); - } - - if (valid_) - d->push_back (doc_string (t.literal ().c_str (), - t.literal ().size ())); - - t = lexer_->next (); - - if (t.punctuation () != token::p_comma) - break; - } - - if (t.punctuation () != token::p_rcbrace) - { - cerr << *path_ << ':' << t.line () << ':' << t.column () << ": error: " - << "expected '}' instead of " << t << endl; - throw error (); - } - } -} - -void parser:: -namespace_def () -{ - token t (lexer_->next ()); - - if (t.type () != token::t_identifier) - { - cerr << *path_ << ':' << t.line () << ':' << t.column () << ": error: " - << "expected identifier instead of " << t << endl; - throw error (); - } - - auto_restore new_scope (scope_); - - if (valid_) - { - namespace_& n ( - root_->new_node (*path_, t.line (), t.column ())); - root_->new_edge (*scope_, n, t.identifier ()); - new_scope.set (&n); - } - - t = lexer_->next (); - - if (t.punctuation () != token::p_lcbrace) - { - cerr << *path_ << ':' << t.line () << ':' << t.column () << ": error: " - << "expected '{' instead of " << t << endl; - throw error (); - } - - // decl-seq - // - t = lexer_->next (); - - while (decl (t)) - t = lexer_->next (); - - if (t.punctuation () != token::p_rcbrace) - { - cerr << *path_ << ':' << t.line () << ':' << t.column () << ": error: " - << "expected namespace, class, documentation, or '}' instead of " - << t << endl; - throw error (); - } -} - -void parser:: -class_def () -{ - token t (lexer_->next ()); - - if (t.type () != token::t_identifier) - { - cerr << *path_ << ':' << t.line () << ':' << t.column () << ": error: " - << "expected identifier instead of " << t << endl; - throw error (); - } - - class_* n (0); - if (valid_) - { - n = &root_->new_node (*path_, t.line (), t.column ()); - root_->new_edge (*scope_, *n, t.identifier ()); - } - - t = lexer_->next (); - - // inheritance-spec - // - if (t.punctuation () == token::p_colon) - { - for (;;) - { - t = lexer_->next (); - size_t line (t.line ()), col (t.column ()); - - string name; - if (!qualified_name (t, name)) - { - cerr << *path_ << ':' << t.line () << ':' << t.column () << ": error: " - << "expected qualified name instead of " << t << endl; - throw error (); - } - - string ns; - - // If it is a fully-qualifed name, then start from the global namespace. - // Otherwise, from the current scope. - // - if (name[0] == ':') - name = string (name, 2, string::npos); - else - ns = scope_->fq_name (); - - if (class_* b = cur_->lookup (ns, name)) - root_->new_edge (*n, *b); - else - { - cerr << *path_ << ':' << line << ':' << col << ": error: " - << "unable to resolve base class '" << name << "'" << endl; - valid_ = false; - } - - if (t.punctuation () != token::p_comma) - break; - } - } - - // abstract-spec - // - if (t.punctuation () == token::p_eq) - { - t = lexer_->next (); - - if (t.type () != token::t_int_lit || t.literal () != "0") - { - cerr << *path_ << ':' << t.line () << ':' << t.column () << ": error: " - << "expected '0' instead of " << t << endl; - throw error (); - } - - if (n != 0) - n->abstract (true); - - t = lexer_->next (); - } - - if (t.punctuation () != token::p_lcbrace) - { - cerr << *path_ << ':' << t.line () << ':' << t.column () << ": error: " - << "expected '{' instead of " << t << endl; - throw error (); - } - - auto_restore new_scope (scope_, n); - - // class-decl-seq - // - t = lexer_->next (); - - for (;;) - { - try - { - if (t.type () == token::t_string_lit || - t.punctuation () == token::p_lcbrace) - { - scope_doc (t); - t = lexer_->next (); - } - else - { - if (!option_def (t)) - break; - } - } - catch (error const&) - { - valid_ = false; - recover (t); - } - } - - if (t.punctuation () != token::p_rcbrace) - { - cerr << *path_ << ':' << t.line () << ':' << t.column () << ": error: " - << "expected option, documentation, or '}' instead of " << t << endl; - throw error (); - } - - t = lexer_->next (); - - if (t.punctuation () != token::p_semi) - { - cerr << *path_ << ':' << t.line () << ':' << t.column () << ": error: " - << "expected ';' instead of " << t << endl; - throw error (); - } -} - -bool parser:: -option_def (token& t) -{ - size_t l (t.line ()), c (t.column ()); - - // type-spec - // - // These two functions set t to the next token if they return - // true. - // - string type_name; - - if (!qualified_name (t, type_name) && !fundamental_type (t, type_name)) - return false; - - option* o (0); - - if (valid_) - { - o = &root_->new_node