// file : cli/parser.cxx // author : Boris Kolpackov // copyright : Copyright (c) 2009-2011 Code Synthesis Tools CC // license : MIT; see accompanying LICENSE file #include // stat #include // stat #include // stat #include #include #include "token.hxx" #include "lexer.hxx" #include "parser.hxx" #include "semantics.hxx" using namespace std; using namespace semantics; const char* keywords[] = { "include", "namespace", "class", "signed", "unsigned", "bool", "char", "wchar_t", "short", "int", "long", "float", "double" }; const char* punctuation[] = { ";", ",", ":", "::", "{", "}", /*"(", ")",*/ "=", "|"}; // Output the token type and value in a format suitable for diagnostics. // std::ostream& operator<< (std::ostream& os, token const& t) { switch (t.type ()) { case token::t_eos: { os << "end-of-stream"; break; } case token::t_keyword: { os << "keyword '" << keywords[t.keyword ()] << "'"; break; } case token::t_identifier: { os << "identifier '" << t.identifier () << "'"; break; } case token::t_punctuation: { os << "'" << punctuation[t.punctuation ()] << "'"; break; } case token::t_cxx_path_lit: { os << "c++ path literal"; break; } case token::t_cli_path_lit: { os << "cli path literal"; break; } case token::t_string_lit: { os << "string literal"; break; } case token::t_char_lit: { os << "char literal"; break; } case token::t_bool_lit: { os << "bool literal"; break; } case token::t_int_lit: { os << "integer literal"; break; } case token::t_float_lit: { os << "floating point literal"; break; } case token::t_call_expr: { os << "call expression"; break; } case token::t_template_expr: { os << "template expression"; break; } } return os; } // RAII-style set new value on construction, restore old one on destruction. // template struct auto_restore { auto_restore (T*& var, T* new_val = 0) : var_ (var), old_val_ (var_) { if (new_val != 0) var_ = new_val; } void set (T* new_val) {var_ = new_val;} ~auto_restore () {var_ = old_val_;} private: T*& var_; T* old_val_; }; void parser:: recover (token& t) { // Recover by skipping past next ';'. // for (;; t = lexer_->next ()) { if (t.type () == token::t_eos) break; if (t.punctuation () == token::p_semi) { t = lexer_->next (); break; } } } auto_ptr parser:: parse (std::istream& is, path const& p) { auto_ptr unit (new cli_unit (p, 1, 1)); { path ap (p); ap.absolute (); ap.normalize (); include_map_[ap] = unit.get (); } root_ = cur_ = unit.get (); lexer l (is, p.string ()); lexer_ = &l; path_ = &p; valid_ = true; def_unit (); if (!valid_ || !l.valid ()) throw invalid_input (); return unit; } void parser:: def_unit () { token t (lexer_->next ()); // include-decl-seq // while (t.keyword () == token::k_include) { try { include_decl (); t = lexer_->next (); } catch (error const&) { valid_ = false; recover (t); } } auto_restore new_scope (scope_, cur_); // decl-seq // while (t.type () != token::t_eos) { try { if (decl (t)) { t = lexer_->next (); continue; } cerr << *path_ << ':' << t.line () << ':' << t.column () << ": error: " << "expected namespace or class declaration instead of " << t << endl; throw error (); } catch (error const&) { valid_ = false; break; // Non-recoverable error. } } } void parser:: include_decl () { token t (lexer_->next ()); token::token_type tt (t.type ()); if (tt != token::t_cxx_path_lit && tt != token::t_cli_path_lit) { cerr << *path_ << ':' << t.line () << ':' << t.column () << ": error: " << "expected path literal instead of " << t << endl; throw error (); } string const& l (t.literal ()); includes::kind_type ik (l[0] == '<' ? includes::bracket : includes::quote); path f; try { f = path (string (l, 1, l.size () - 2)); } catch (const invalid_path& e) { cerr << *path_ << ':' << t.line () << ':' << t.column () << ": error: " << "'" << e.path () << "' is not a valid filesystem path" << endl; valid_ = false; } if (valid_) { if (tt == token::t_cxx_path_lit) { cxx_unit& n ( root_->new_node (*path_, t.line (), t.column ())); root_->new_edge (*cur_, n, ik, f); } else { path p; // If this is a quote include, then include relative to the current // file. // if (ik == includes::quote) { p = path_->directory () / f; p.normalize (); } // Otherwise search the include directories (-I). // else { struct stat s; for (paths::const_iterator i (include_paths_.begin ()); i != include_paths_.end (); ++i) { p = *i / f; p.normalize (); // Check that the file exist without checking for permissions, etc. // if (stat (p.string ().c_str (), &s) == 0 && S_ISREG (s.st_mode)) break; p.clear (); } if (p.empty ()) { cerr << *path_ << ':' << t.line () << ':' << t.column () << ": " << "error: file '" << f << "' not found in any of the " << "include search directories (-I)" << endl; valid_ = false; } } if (valid_) { // Detect and ignore multiple inclusions. // path ap (p); ap.absolute (); ap.normalize (); include_map::iterator it (include_map_.find (ap)); if (it == include_map_.end ()) { cli_unit& n (root_->new_node (p, 1, 1)); root_->new_edge (*cur_, n, ik, f); include_map_[ap] = &n; auto_restore new_cur (cur_, &n); auto_restore new_path (path_, &p); ifstream ifs (p.string ().c_str ()); if (ifs.is_open ()) { ifs.exceptions (ifstream::failbit | ifstream::badbit); try { lexer l (ifs, p.string ()); auto_restore new_lexer (lexer_, &l); def_unit (); if (!l.valid ()) valid_ = false; } catch (std::ios_base::failure const&) { cerr << p << ": error: read failure" << endl; valid_ = false; } } else { cerr << p << ": error: unable to open in read mode" << endl; valid_ = false; } } else root_->new_edge (*cur_, *it->second, ik, f); } } } t = lexer_->next (); if (t.punctuation () != token::p_semi) { cerr << *path_ << ':' << t.line () << ':' << t.column () << ": error: " << "expected ';' instead of " << t << endl; throw error (); } } bool parser:: decl (token& t) { if (t.type () == token::t_keyword) { switch (t.keyword ()) { case token::k_namespace: { namespace_def (); return true; } case token::k_class: { class_def (); return true; } default: break; } } return false; } void parser:: namespace_def () { token t (lexer_->next ()); if (t.type () != token::t_identifier) { cerr << *path_ << ':' << t.line () << ':' << t.column () << ": error: " << "expected identifier instead of " << t << endl; throw error (); } auto_restore new_scope (scope_); if (valid_) { namespace_& n ( root_->new_node (*path_, t.line (), t.column ())); root_->new_edge (*scope_, n, t.identifier ()); new_scope.set (&n); } t = lexer_->next (); if (t.punctuation () != token::p_lcbrace) { cerr << *path_ << ':' << t.line () << ':' << t.column () << ": error: " << "expected '{' instead of " << t << endl; throw error (); } // decl-seq // t = lexer_->next (); while (decl (t)) t = lexer_->next (); if (t.punctuation () != token::p_rcbrace) { cerr << *path_ << ':' << t.line () << ':' << t.column () << ": error: " << "expected namespace declaration, class declaration, or '}' " << "instead of " << t << endl; throw error (); } } void parser:: class_def () { token t (lexer_->next ()); if (t.type () != token::t_identifier) { cerr << *path_ << ':' << t.line () << ':' << t.column () << ": error: " << "expected identifier instead of " << t << endl; throw error (); } class_* n (0); if (valid_) { n = &root_->new_node (*path_, t.line (), t.column ()); root_->new_edge (*scope_, *n, t.identifier ()); } t = lexer_->next (); // inheritance-spec // if (t.punctuation () == token::p_colon) { for (;;) { t = lexer_->next (); size_t line (t.line ()), col (t.column ()); string name; if (!qualified_name (t, name)) { cerr << *path_ << ':' << t.line () << ':' << t.column () << ": error: " << "expected qualified name instead of " << t << endl; throw error (); } string ns; // If it is a fully-qualifed name, then start from the global namespace. // Otherwise, from the current scope. // if (name[0] == ':') name = string (name, 2, string::npos); else ns = scope_->fq_name (); if (class_* b = cur_->lookup (ns, name)) root_->new_edge (*n, *b); else { cerr << *path_ << ':' << line << ':' << col << ": error: " << "unable to resolve base class '" << name << "'" << endl; valid_ = false; } if (t.punctuation () != token::p_comma) break; } } // abstract-spec // if (t.punctuation () == token::p_eq) { t = lexer_->next (); if (t.type () != token::t_int_lit || t.literal () != "0") { cerr << *path_ << ':' << t.line () << ':' << t.column () << ": error: " << "expected '0' instead of " << t << endl; throw error (); } if (n != 0) n->abstract (true); t = lexer_->next (); } if (t.punctuation () != token::p_lcbrace) { cerr << *path_ << ':' << t.line () << ':' << t.column () << ": error: " << "expected '{' instead of " << t << endl; throw error (); } auto_restore new_scope (scope_, n); // decl-seq // t = lexer_->next (); for (;;) { try { if (!option_def (t)) break; t = lexer_->next (); } catch (error const&) { valid_ = false; recover (t); } } if (t.punctuation () != token::p_rcbrace) { cerr << *path_ << ':' << t.line () << ':' << t.column () << ": error: " << "expected option declaration or '}' instead of " << t << endl; throw error (); } t = lexer_->next (); if (t.punctuation () != token::p_semi) { cerr << *path_ << ':' << t.line () << ':' << t.column () << ": error: " << "expected ';' instead of " << t << endl; throw error (); } } bool parser:: option_def (token& t) { size_t l (t.line ()), c (t.column ()); // type-spec // // These two functions set t to the next token if they return // true. // string type_name; if (!qualified_name (t, type_name) && !fundamental_type (t, type_name)) return false; option* o (0); if (valid_) { o = &root_->new_node