// file : cli/parser.cxx // author : Boris Kolpackov // copyright : Copyright (c) 2009-2018 Code Synthesis Tools CC // license : MIT; see accompanying LICENSE file #ifndef _WIN32 # include // stat # include // stat # include // stat #else # include // _stat # include // _stat(), S_I* # ifdef _MSC_VER // Unlikely to be fixed in newer versions. # define S_ISREG(m) (((m) & S_IFMT) == S_IFREG) # endif #endif #include #include #include #include #include #include #include using namespace std; using namespace semantics; // Check that the file exist without checking for permissions, etc. // inline static bool file_exists (const path& p) { #ifndef _WIN32 struct stat s; int r (stat (p.string ().c_str (), &s)); #else struct _stat s; int r (_stat (p.string ().c_str (), &s)); #endif return r == 0 && S_ISREG (s.st_mode); } const char* keywords[] = { "include", "namespace", "class", "signed", "unsigned", "bool", "char", "wchar_t", "short", "int", "long", "float", "double" }; const char* punctuation[] = { ";", ",", ":", "::", "{", "}", /*"(", ")",*/ "=", "|"}; // Output the token type and value in a format suitable for diagnostics. // std::ostream& operator<< (std::ostream& os, token const& t) { switch (t.type ()) { case token::t_eos: { os << "end-of-stream"; break; } case token::t_keyword: { os << "keyword '" << keywords[t.keyword ()] << "'"; break; } case token::t_identifier: { os << "identifier '" << t.identifier () << "'"; break; } case token::t_punctuation: { os << "'" << punctuation[t.punctuation ()] << "'"; break; } case token::t_cxx_path_lit: { os << "c++ path literal"; break; } case token::t_cli_path_lit: { os << "cli path literal"; break; } case token::t_string_lit: { os << "string literal"; break; } case token::t_char_lit: { os << "char literal"; break; } case token::t_bool_lit: { os << "bool literal"; break; } case token::t_int_lit: { os << "integer literal"; break; } case token::t_float_lit: { os << "floating point literal"; break; } case token::t_call_expr: { os << "call expression"; break; } case token::t_template_expr: { os << "template expression"; break; } } return os; } // RAII-style set new value on construction, restore old one on destruction. // template struct auto_restore { auto_restore (T*& var, T* new_val = 0) : var_ (var), old_val_ (var_) { if (new_val != 0) var_ = new_val; } void set (T* new_val) {var_ = new_val;} ~auto_restore () {var_ = old_val_;} private: T*& var_; T* old_val_; }; void parser:: recover (token& t) { // Recover by skipping past next ';' or '}'. // for (;; t = lexer_->next ()) { if (t.type () == token::t_eos) break; token::punctuation_type p (t.punctuation ()); if (p == token::p_semi || p == token::p_rcbrace) { t = lexer_->next (); break; } } } unique_ptr parser:: parse (std::istream& is, path const& p) { unique_ptr unit (new cli_unit (p, 1, 1)); { path ap (p); ap.absolute (); ap.normalize (); include_map_[ap] = unit.get (); } root_ = cur_ = unit.get (); lexer l (is, p.string ()); lexer_ = &l; doc_count_ = 0; path_ = &p; valid_ = true; def_unit (); if (!valid_ || !l.valid ()) throw invalid_input (); return unit; } void parser:: def_unit () { token t (lexer_->next ()); // include-decl-seq // for (token::keyword_type k (t.keyword ()); k == token::k_include || k == token::k_source; k = t.keyword ()) { try { if (k == token::k_include) include_decl (); else source_decl (); t = lexer_->next (); } catch (error const&) { valid_ = false; recover (t); } } auto_restore new_scope (scope_, cur_); // decl-seq // while (t.type () != token::t_eos) { try { if (t.keyword () == token::k_source) { try { source_decl (); t = lexer_->next (); } catch (error const&) { valid_ = false; recover (t); } continue; } if (decl (t)) { t = lexer_->next (); continue; } cerr << *path_ << ':' << t.line () << ':' << t.column () << ": error: " << "expected namespace, class, or documentation instead of " << t << endl; throw error (); } catch (error const&) { valid_ = false; break; // Non-recoverable error. } } } void parser:: source_decl () { token t (lexer_->next ()); if (t.type () != token::t_cli_path_lit) { cerr << *path_ << ':' << t.line () << ':' << t.column () << ": error: " << "expected cli path literal instead of " << t << endl; throw error (); } string const& l (t.literal ()); bool q (l[0] == '"'); // Quote or braket include? path f; try { f = path (string (l, 1, l.size () - 2)); } catch (const invalid_path& e) { cerr << *path_ << ':' << t.line () << ':' << t.column () << ": error: " << "'" << e.path () << "' is not a valid filesystem path" << endl; valid_ = false; } if (valid_) { path p; // If this is a quote include, then include relative to the current // file. // if (q) { p = path_->directory () / f; p.normalize (); } // Otherwise search the include directories (-I). // else { for (paths::const_iterator i (include_paths_.begin ()); i != include_paths_.end (); ++i) { p = *i / f; p.normalize (); if (file_exists (p)) break; p.clear (); } if (p.empty ()) { cerr << *path_ << ':' << t.line () << ':' << t.column () << ": " << "error: file '" << f << "' not found in any of the " << "include search directories (-I)" << endl; valid_ = false; } } if (valid_) { auto_restore new_path (path_, &p); ifstream ifs (p.string ().c_str ()); if (ifs.is_open ()) { ifs.exceptions (ifstream::failbit | ifstream::badbit); try { lexer l (ifs, p.string ()); auto_restore new_lexer (lexer_, &l); def_unit (); if (!l.valid ()) valid_ = false; } catch (std::ios_base::failure const&) { cerr << p << ": error: read failure" << endl; valid_ = false; } } else { cerr << p << ": error: unable to open in read mode" << endl; valid_ = false; } } } t = lexer_->next (); if (t.punctuation () != token::p_semi) { cerr << *path_ << ':' << t.line () << ':' << t.column () << ": error: " << "expected ';' instead of " << t << endl; throw error (); } } void parser:: include_decl () { token t (lexer_->next ()); token::token_type tt (t.type ()); if (tt != token::t_cxx_path_lit && tt != token::t_cli_path_lit) { cerr << *path_ << ':' << t.line () << ':' << t.column () << ": error: " << "expected path literal instead of " << t << endl; throw error (); } string const& l (t.literal ()); includes::kind_type ik (l[0] == '<' ? includes::bracket : includes::quote); path f; try { f = path (string (l, 1, l.size () - 2)); } catch (const invalid_path& e) { cerr << *path_ << ':' << t.line () << ':' << t.column () << ": error: " << "'" << e.path () << "' is not a valid filesystem path" << endl; valid_ = false; } if (valid_) { if (tt == token::t_cxx_path_lit) { cxx_unit& n ( root_->new_node (*path_, t.line (), t.column ())); root_->new_edge (*cur_, n, ik, f); } else { path p; // If this is a quote include, then include relative to the current // file. // if (ik == includes::quote) { p = path_->directory () / f; p.normalize (); } // Otherwise search the include directories (-I). // else { for (paths::const_iterator i (include_paths_.begin ()); i != include_paths_.end (); ++i) { p = *i / f; p.normalize (); if (file_exists (p)) break; p.clear (); } if (p.empty ()) { cerr << *path_ << ':' << t.line () << ':' << t.column () << ": " << "error: file '" << f << "' not found in any of the " << "include search directories (-I)" << endl; valid_ = false; } } if (valid_) { // Detect and ignore multiple inclusions. // path ap (p); ap.absolute (); ap.normalize (); include_map::iterator it (include_map_.find (ap)); if (it == include_map_.end ()) { cli_unit& n (root_->new_node (p, 1, 1)); root_->new_edge (*cur_, n, ik, f); include_map_[ap] = &n; auto_restore new_cur (cur_, &n); auto_restore new_path (path_, &p); ifstream ifs (p.string ().c_str ()); if (ifs.is_open ()) { ifs.exceptions (ifstream::failbit | ifstream::badbit); try { lexer l (ifs, p.string ()); auto_restore new_lexer (lexer_, &l); def_unit (); if (!l.valid ()) valid_ = false; } catch (std::ios_base::failure const&) { cerr << p << ": error: read failure" << endl; valid_ = false; } } else { cerr << p << ": error: unable to open in read mode" << endl; valid_ = false; } } else root_->new_edge (*cur_, *it->second, ik, f); } } } t = lexer_->next (); if (t.punctuation () != token::p_semi) { cerr << *path_ << ':' << t.line () << ':' << t.column () << ": error: " << "expected ';' instead of " << t << endl; throw error (); } } bool parser:: decl (token& t) { switch (t.type ()) { case token::t_keyword: { switch (t.keyword ()) { case token::k_namespace: { namespace_def (); return true; } case token::k_class: { class_def (); return true; } default: break; } break; } case token::t_punctuation: { if (t.punctuation () != token::p_lcbrace) break; } // Fall through. case token::t_string_lit: { scope_doc (t); return true; } default: break; } return false; } void parser:: scope_doc (token& t) { size_t ln (t.line ()), cl (t.column ()); // Use a counter to give scope-level docs unique names. We use a // single counter throughout all units/scope because we could be // reopening namespaces. // if (t.type () == token::t_string_lit) { // string-literal // if (valid_) { // Enter each ""-enclosed string as a separate documentation // entry, handle documentation variables. // const string& l (t.literal ()); char p ('\0'); for (size_t b (0), e (1); e < l.size (); ++e) { if (l[e] == '"' && p != '\\') { string s (doc_string (l.c_str () + b, e - b + 1)); if (!s.empty ()) { doc& d (root_->new_node (*path_, ln, cl)); // See if this is a variable assignment: "\=". // size_t p (0); // '=' position. if (s.size () >= 3 && s[0] == '\\' && s[1] != '\\') { for (p = 1; p != s.size (); ++p) { char c (s[p]); // Variable name should be a C identifier. // if (!(c == '_' || ('a' <= c && c <= 'z') || ('A' <= c && c <= 'Z') || (p != 1 && '0' <= c && c <= '9'))) break; } if (p == s.size () || s[p] != '=' || p == 1) // Not a variable. p = 0; } if (p != 0) { root_->new_edge ( *scope_, d, "var: " + string (s, 1, p - 1)); s = string (s, p + 1); } else { ostringstream os; os << "doc: " << doc_count_++; root_->new_edge (*scope_, d, os.str ()); } d.push_back (s); // move(). } // If we have more, then make b point to the opening '"'. Second // ++e in for() above will make e point to the character after it. // b = ++e; continue; } // We need to keep track of \\ escapings so we don't confuse // them with \", as in \\". // if (l[e] == '\\' && p == '\\') p = '\0'; else p = l[e]; } } } else { // doc-string-seq // assert (t.punctuation () == token::p_lcbrace); doc* d (0); if (valid_) { ostringstream os; os << "doc: " << doc_count_++; d = &root_->new_node (*path_, ln, cl); root_->new_edge (*scope_, *d, os.str ()); } for (t = lexer_->next ();; t = lexer_->next ()) { if (t.type () != token::t_string_lit) { cerr << *path_ << ':' << t.line () << ':' << t.column () << ": error: " << "expected documentation string instead of " << t << endl; throw error (); } if (valid_) d->push_back (doc_string (t.literal ().c_str (), t.literal ().size ())); t = lexer_->next (); if (t.punctuation () != token::p_comma) break; } if (t.punctuation () != token::p_rcbrace) { cerr << *path_ << ':' << t.line () << ':' << t.column () << ": error: " << "expected '}' instead of " << t << endl; throw error (); } } } void parser:: namespace_def () { token t (lexer_->next ()); if (t.type () != token::t_identifier) { cerr << *path_ << ':' << t.line () << ':' << t.column () << ": error: " << "expected identifier instead of " << t << endl; throw error (); } auto_restore new_scope (scope_); if (valid_) { namespace_& n ( root_->new_node (*path_, t.line (), t.column ())); root_->new_edge (*scope_, n, t.identifier ()); new_scope.set (&n); } t = lexer_->next (); if (t.punctuation () != token::p_lcbrace) { cerr << *path_ << ':' << t.line () << ':' << t.column () << ": error: " << "expected '{' instead of " << t << endl; throw error (); } // decl-seq // t = lexer_->next (); while (decl (t)) t = lexer_->next (); if (t.punctuation () != token::p_rcbrace) { cerr << *path_ << ':' << t.line () << ':' << t.column () << ": error: " << "expected namespace, class, documentation, or '}' instead of " << t << endl; throw error (); } } void parser:: class_def () { token t (lexer_->next ()); if (t.type () != token::t_identifier) { cerr << *path_ << ':' << t.line () << ':' << t.column () << ": error: " << "expected identifier instead of " << t << endl; throw error (); } class_* n (0); if (valid_) { n = &root_->new_node (*path_, t.line (), t.column ()); root_->new_edge (*scope_, *n, t.identifier ()); } t = lexer_->next (); // inheritance-spec // if (t.punctuation () == token::p_colon) { for (;;) { t = lexer_->next (); size_t line (t.line ()), col (t.column ()); string name; if (!qualified_name (t, name)) { cerr << *path_ << ':' << t.line () << ':' << t.column () << ": error: " << "expected qualified name instead of " << t << endl; throw error (); } string ns; // If it is a fully-qualifed name, then start from the global namespace. // Otherwise, from the current scope. // if (name[0] == ':') name = string (name, 2, string::npos); else ns = scope_->fq_name (); if (class_* b = cur_->lookup (ns, name)) root_->new_edge (*n, *b); else { cerr << *path_ << ':' << line << ':' << col << ": error: " << "unable to resolve base class '" << name << "'" << endl; valid_ = false; } if (t.punctuation () != token::p_comma) break; } } // abstract-spec // if (t.punctuation () == token::p_eq) { t = lexer_->next (); if (t.type () != token::t_int_lit || t.literal () != "0") { cerr << *path_ << ':' << t.line () << ':' << t.column () << ": error: " << "expected '0' instead of " << t << endl; throw error (); } if (n != 0) n->abstract (true); t = lexer_->next (); } if (t.punctuation () != token::p_lcbrace) { cerr << *path_ << ':' << t.line () << ':' << t.column () << ": error: " << "expected '{' instead of " << t << endl; throw error (); } auto_restore new_scope (scope_, n); // class-decl-seq // t = lexer_->next (); for (;;) { try { if (t.type () == token::t_string_lit || t.punctuation () == token::p_lcbrace) { scope_doc (t); t = lexer_->next (); } else { if (!option_def (t)) break; } } catch (error const&) { valid_ = false; recover (t); } } if (t.punctuation () != token::p_rcbrace) { cerr << *path_ << ':' << t.line () << ':' << t.column () << ": error: " << "expected option, documentation, or '}' instead of " << t << endl; throw error (); } t = lexer_->next (); if (t.punctuation () != token::p_semi) { cerr << *path_ << ':' << t.line () << ':' << t.column () << ": error: " << "expected ';' instead of " << t << endl; throw error (); } } bool parser:: option_def (token& t) { size_t l (t.line ()), c (t.column ()); // type-spec // // These two functions set t to the next token if they return // true. // string type_name; if (!qualified_name (t, type_name) && !fundamental_type (t, type_name)) return false; option* o (0); if (valid_) { o = &root_->new_node