diff options
Diffstat (limited to 'cli/cli/parser.cxx')
-rw-r--r-- | cli/cli/parser.cxx | 1728 |
1 files changed, 1728 insertions, 0 deletions
diff --git a/cli/cli/parser.cxx b/cli/cli/parser.cxx new file mode 100644 index 0000000..4685edc --- /dev/null +++ b/cli/cli/parser.cxx @@ -0,0 +1,1728 @@ +// file : cli/parser.cxx +// author : Boris Kolpackov <boris@codesynthesis.com> +// license : MIT; see accompanying LICENSE file + +#ifndef _WIN32 +# include <unistd.h> // stat +# include <sys/types.h> // stat +# include <sys/stat.h> // stat +#else +# include <sys/types.h> // _stat +# include <sys/stat.h> // _stat(), S_I* + +# ifdef _MSC_VER // Unlikely to be fixed in newer versions. +# define S_ISREG(m) (((m) & S_IFMT) == S_IFREG) +# endif +#endif + +#include <fstream> +#include <sstream> +#include <iostream> + +#include <cli/token.hxx> +#include <cli/lexer.hxx> +#include <cli/parser.hxx> + +#include <cli/semantics.hxx> + +using namespace std; +using namespace semantics; + +// Check that the file exist without checking for permissions, etc. +// +inline static bool +file_exists (const path& p) +{ +#ifndef _WIN32 + struct stat s; + int r (stat (p.string ().c_str (), &s)); +#else + struct _stat s; + int r (_stat (p.string ().c_str (), &s)); +#endif + + return r == 0 && S_ISREG (s.st_mode); +} + +const char* keywords[] = +{ + "include", + "namespace", + "class", + "signed", + "unsigned", + "bool", + "char", + "wchar_t", + "short", + "int", + "long", + "float", + "double" +}; + +const char* punctuation[] = { + ";", ",", ":", "::", "{", "}", /*"(", ")",*/ "=", "|"}; + +// Output the token type and value in a format suitable for diagnostics. +// +std::ostream& +operator<< (std::ostream& os, token const& t) +{ + switch (t.type ()) + { + case token::t_eos: + { + os << "end-of-stream"; + break; + } + case token::t_keyword: + { + os << "keyword '" << keywords[t.keyword ()] << "'"; + break; + } + case token::t_identifier: + { + os << "identifier '" << t.identifier () << "'"; + break; + } + case token::t_punctuation: + { + os << "'" << punctuation[t.punctuation ()] << "'"; + break; + } + case token::t_cxx_path_lit: + { + os << "c++ path literal"; + break; + } + case token::t_cli_path_lit: + { + os << "cli path literal"; + break; + } + case token::t_string_lit: + { + os << "string literal"; + break; + } + case token::t_char_lit: + { + os << "char literal"; + break; + } + case token::t_bool_lit: + { + os << "bool literal"; + break; + } + case token::t_int_lit: + { + os << "integer literal"; + break; + } + case token::t_float_lit: + { + os << "floating point literal"; + break; + } + case token::t_call_expr: + { + os << "call expression"; + break; + } + case token::t_template_expr: + { + os << "template expression"; + break; + } + } + + return os; +} + +// RAII-style set new value on construction, restore old one on destruction. +// +template <typename T> +struct auto_restore +{ + auto_restore (T*& var, T* new_val = 0) + : var_ (var), old_val_ (var_) + { + if (new_val != 0) + var_ = new_val; + } + + void + set (T* new_val) {var_ = new_val;} + + ~auto_restore () {var_ = old_val_;} + +private: + T*& var_; + T* old_val_; +}; + + +void parser:: +recover (token& t) +{ + // Recover by skipping past next ';' or '}'. + // + for (;; t = lexer_->next ()) + { + if (t.type () == token::t_eos) + break; + + token::punctuation_type p (t.punctuation ()); + + if (p == token::p_semi || p == token::p_rcbrace) + { + t = lexer_->next (); + break; + } + } +} + +unique_ptr<cli_unit> parser:: +parse (std::istream& is, path const& p) +{ + unique_ptr<cli_unit> unit (new cli_unit (p, 1, 1)); + + { + path ap (p); + ap.absolute (); + ap.normalize (); + include_map_[ap] = unit.get (); + } + + root_ = cur_ = unit.get (); + + lexer l (is, p.string ()); + lexer_ = &l; + + doc_count_ = 0; + + path_ = &p; + valid_ = true; + + def_unit (); + + if (!valid_ || !l.valid ()) + throw invalid_input (); + + return unit; +} + +void parser:: +def_unit () +{ + token t (lexer_->next ()); + + // include-decl-seq + // + for (token::keyword_type k (t.keyword ()); + k == token::k_include || k == token::k_source; + k = t.keyword ()) + { + try + { + if (k == token::k_include) + include_decl (); + else + source_decl (); + + t = lexer_->next (); + } + catch (error const&) + { + valid_ = false; + recover (t); + } + } + + auto_restore<scope> new_scope (scope_, cur_); + + // decl-seq + // + while (t.type () != token::t_eos) + { + try + { + if (t.keyword () == token::k_source) + { + try + { + source_decl (); + t = lexer_->next (); + } + catch (error const&) + { + valid_ = false; + recover (t); + } + + continue; + } + + if (decl (t)) + { + t = lexer_->next (); + continue; + } + + cerr << *path_ << ':' << t.line () << ':' << t.column () << ": error: " + << "expected namespace, class, or documentation instead of " + << t << endl; + throw error (); + } + catch (error const&) + { + valid_ = false; + break; // Non-recoverable error. + } + } +} + +void parser:: +source_decl () +{ + token t (lexer_->next ()); + + if (t.type () != token::t_cli_path_lit) + { + cerr << *path_ << ':' << t.line () << ':' << t.column () << ": error: " + << "expected cli path literal instead of " << t << endl; + throw error (); + } + + string const& l (t.literal ()); + bool q (l[0] == '"'); // Quote or braket include? + + path f; + try + { + f = path (string (l, 1, l.size () - 2)); + } + catch (const invalid_path& e) + { + cerr << *path_ << ':' << t.line () << ':' << t.column () << ": error: " + << "'" << e.path () << "' is not a valid filesystem path" << endl; + valid_ = false; + } + + if (valid_) + { + path p; + + // If this is a quote include, then include relative to the current + // file. + // + if (q) + { + p = path_->directory () / f; + p.normalize (); + } + // Otherwise search the include directories (-I). + // + else + { + for (paths::const_iterator i (include_paths_.begin ()); + i != include_paths_.end (); ++i) + { + p = *i / f; + p.normalize (); + + if (file_exists (p)) + break; + + p.clear (); + } + + if (p.empty ()) + { + cerr << *path_ << ':' << t.line () << ':' << t.column () << ": " + << "error: file '" << f << "' not found in any of the " + << "include search directories (-I)" << endl; + valid_ = false; + } + } + + if (valid_) + { + auto_restore<path const> new_path (path_, &p); + + ifstream ifs (p.string ().c_str ()); + if (ifs.is_open ()) + { + ifs.exceptions (ifstream::failbit | ifstream::badbit); + + try + { + lexer l (ifs, p.string ()); + auto_restore<lexer> new_lexer (lexer_, &l); + + def_unit (); + + if (!l.valid ()) + valid_ = false; + } + catch (std::ios_base::failure const&) + { + cerr << p << ": error: read failure" << endl; + valid_ = false; + } + } + else + { + cerr << p << ": error: unable to open in read mode" << endl; + valid_ = false; + } + } + } + + t = lexer_->next (); + + if (t.punctuation () != token::p_semi) + { + cerr << *path_ << ':' << t.line () << ':' << t.column () << ": error: " + << "expected ';' instead of " << t << endl; + throw error (); + } +} + +void parser:: +include_decl () +{ + token t (lexer_->next ()); + token::token_type tt (t.type ()); + + if (tt != token::t_cxx_path_lit && tt != token::t_cli_path_lit) + { + cerr << *path_ << ':' << t.line () << ':' << t.column () << ": error: " + << "expected path literal instead of " << t << endl; + throw error (); + } + + string const& l (t.literal ()); + includes::kind_type ik (l[0] == '<' ? includes::bracket : includes::quote); + + path f; + try + { + f = path (string (l, 1, l.size () - 2)); + } + catch (const invalid_path& e) + { + cerr << *path_ << ':' << t.line () << ':' << t.column () << ": error: " + << "'" << e.path () << "' is not a valid filesystem path" << endl; + valid_ = false; + } + + if (valid_) + { + if (tt == token::t_cxx_path_lit) + { + cxx_unit& n ( + root_->new_node<cxx_unit> (*path_, t.line (), t.column ())); + root_->new_edge<cxx_includes> (*cur_, n, ik, f); + } + else + { + path p; + // If this is a quote include, then include relative to the current + // file. + // + if (ik == includes::quote) + { + p = path_->directory () / f; + p.normalize (); + } + // Otherwise search the include directories (-I). + // + else + { + for (paths::const_iterator i (include_paths_.begin ()); + i != include_paths_.end (); ++i) + { + p = *i / f; + p.normalize (); + + if (file_exists (p)) + break; + + p.clear (); + } + + if (p.empty ()) + { + cerr << *path_ << ':' << t.line () << ':' << t.column () << ": " + << "error: file '" << f << "' not found in any of the " + << "include search directories (-I)" << endl; + valid_ = false; + } + } + + if (valid_) + { + // Detect and ignore multiple inclusions. + // + path ap (p); + ap.absolute (); + ap.normalize (); + + include_map::iterator it (include_map_.find (ap)); + if (it == include_map_.end ()) + { + cli_unit& n (root_->new_node<cli_unit> (p, 1, 1)); + root_->new_edge<cli_includes> (*cur_, n, ik, f); + include_map_[ap] = &n; + + auto_restore<cli_unit> new_cur (cur_, &n); + auto_restore<path const> new_path (path_, &p); + + ifstream ifs (p.string ().c_str ()); + if (ifs.is_open ()) + { + ifs.exceptions (ifstream::failbit | ifstream::badbit); + + try + { + lexer l (ifs, p.string ()); + auto_restore<lexer> new_lexer (lexer_, &l); + + def_unit (); + + if (!l.valid ()) + valid_ = false; + } + catch (std::ios_base::failure const&) + { + cerr << p << ": error: read failure" << endl; + valid_ = false; + } + } + else + { + cerr << p << ": error: unable to open in read mode" << endl; + valid_ = false; + } + } + else + root_->new_edge<cli_includes> (*cur_, *it->second, ik, f); + } + } + } + + t = lexer_->next (); + + if (t.punctuation () != token::p_semi) + { + cerr << *path_ << ':' << t.line () << ':' << t.column () << ": error: " + << "expected ';' instead of " << t << endl; + throw error (); + } +} + +bool parser:: +decl (token& t) +{ + switch (t.type ()) + { + case token::t_keyword: + { + switch (t.keyword ()) + { + case token::k_namespace: + { + namespace_def (); + return true; + } + case token::k_class: + { + class_def (); + return true; + } + default: + break; + } + + break; + } + case token::t_punctuation: + { + if (t.punctuation () != token::p_lcbrace) + break; + } + // Fall through. + case token::t_string_lit: + { + scope_doc (t); + return true; + } + default: + break; + } + + return false; +} + +void parser:: +scope_doc (token& t) +{ + size_t ln (t.line ()), cl (t.column ()); + + // Use a counter to give scope-level docs unique names. We use a + // single counter throughout all units/scope because we could be + // reopening namespaces. + // + if (t.type () == token::t_string_lit) + { + // string-literal + // + if (valid_) + { + // Enter each ""-enclosed string as a separate documentation + // entry, handle documentation variables. + // + const string& l (t.literal ()); + + char p ('\0'); + for (size_t b (0), e (1); e < l.size (); ++e) + { + if (l[e] == '"' && p != '\\') + { + string s (doc_string (l.c_str () + b, e - b + 1)); + + if (!s.empty ()) + { + doc& d (root_->new_node<doc> (*path_, ln, cl)); + + // See if this is a variable assignment: "\<var>=<val>". + // + size_t p (0); // '=' position. + if (s.size () >= 3 && s[0] == '\\' && s[1] != '\\') + { + for (p = 1; p != s.size (); ++p) + { + char c (s[p]); + + // Variable name should be a C identifier. + // + if (!(c == '_' || + ('a' <= c && c <= 'z') || + ('A' <= c && c <= 'Z') || + (p != 1 && '0' <= c && c <= '9'))) + break; + } + + if (p == s.size () || s[p] != '=' || p == 1) // Not a variable. + p = 0; + } + + if (p != 0) + { + root_->new_edge<names> ( + *scope_, d, "var: " + string (s, 1, p - 1)); + s = string (s, p + 1); + } + else + { + ostringstream os; + os << "doc: " << doc_count_++; + root_->new_edge<names> (*scope_, d, os.str ()); + } + + d.push_back (s); // move(). + } + + // If we have more, then make b point to the opening '"'. Second + // ++e in for() above will make e point to the character after it. + // + b = ++e; + continue; + } + + // We need to keep track of \\ escapings so we don't confuse + // them with \", as in \\". + // + if (l[e] == '\\' && p == '\\') + p = '\0'; + else + p = l[e]; + } + } + } + else + { + // doc-string-seq + // + assert (t.punctuation () == token::p_lcbrace); + + doc* d (0); + if (valid_) + { + ostringstream os; + os << "doc: " << doc_count_++; + + d = &root_->new_node<doc> (*path_, ln, cl); + root_->new_edge<names> (*scope_, *d, os.str ()); + } + + for (t = lexer_->next ();; t = lexer_->next ()) + { + if (t.type () != token::t_string_lit) + { + cerr << *path_ << ':' << t.line () << ':' << t.column () << ": error: " + << "expected documentation string instead of " << t << endl; + throw error (); + } + + if (valid_) + d->push_back (doc_string (t.literal ().c_str (), + t.literal ().size ())); + + t = lexer_->next (); + + if (t.punctuation () != token::p_comma) + break; + } + + if (t.punctuation () != token::p_rcbrace) + { + cerr << *path_ << ':' << t.line () << ':' << t.column () << ": error: " + << "expected '}' instead of " << t << endl; + throw error (); + } + } +} + +void parser:: +namespace_def () +{ + token t (lexer_->next ()); + + if (t.type () != token::t_identifier) + { + cerr << *path_ << ':' << t.line () << ':' << t.column () << ": error: " + << "expected identifier instead of " << t << endl; + throw error (); + } + + auto_restore<scope> new_scope (scope_); + + if (valid_) + { + namespace_& n ( + root_->new_node<namespace_> (*path_, t.line (), t.column ())); + root_->new_edge<names> (*scope_, n, t.identifier ()); + new_scope.set (&n); + } + + t = lexer_->next (); + + if (t.punctuation () != token::p_lcbrace) + { + cerr << *path_ << ':' << t.line () << ':' << t.column () << ": error: " + << "expected '{' instead of " << t << endl; + throw error (); + } + + // decl-seq + // + t = lexer_->next (); + + while (decl (t)) + t = lexer_->next (); + + if (t.punctuation () != token::p_rcbrace) + { + cerr << *path_ << ':' << t.line () << ':' << t.column () << ": error: " + << "expected namespace, class, documentation, or '}' instead of " + << t << endl; + throw error (); + } +} + +void parser:: +class_def () +{ + token t (lexer_->next ()); + + if (t.type () != token::t_identifier) + { + cerr << *path_ << ':' << t.line () << ':' << t.column () << ": error: " + << "expected identifier instead of " << t << endl; + throw error (); + } + + class_* n (0); + if (valid_) + { + n = &root_->new_node<class_> (*path_, t.line (), t.column ()); + root_->new_edge<names> (*scope_, *n, t.identifier ()); + } + + t = lexer_->next (); + + // inheritance-spec + // + if (t.punctuation () == token::p_colon) + { + for (;;) + { + t = lexer_->next (); + size_t line (t.line ()), col (t.column ()); + + string name; + if (!qualified_name (t, name)) + { + cerr << *path_ << ':' << t.line () << ':' << t.column () << ": error: " + << "expected qualified name instead of " << t << endl; + throw error (); + } + + string ns; + + // If it is a fully-qualifed name, then start from the global namespace. + // Otherwise, from the current scope. + // + if (name[0] == ':') + name = string (name, 2, string::npos); + else + ns = scope_->fq_name (); + + if (class_* b = cur_->lookup<class_> (ns, name)) + root_->new_edge<inherits> (*n, *b); + else + { + cerr << *path_ << ':' << line << ':' << col << ": error: " + << "unable to resolve base class '" << name << "'" << endl; + valid_ = false; + } + + if (t.punctuation () != token::p_comma) + break; + } + } + + // abstract-spec + // + if (t.punctuation () == token::p_eq) + { + t = lexer_->next (); + + if (t.type () != token::t_int_lit || t.literal () != "0") + { + cerr << *path_ << ':' << t.line () << ':' << t.column () << ": error: " + << "expected '0' instead of " << t << endl; + throw error (); + } + + if (n != 0) + n->abstract (true); + + t = lexer_->next (); + } + + if (t.punctuation () != token::p_lcbrace) + { + cerr << *path_ << ':' << t.line () << ':' << t.column () << ": error: " + << "expected '{' instead of " << t << endl; + throw error (); + } + + auto_restore<scope> new_scope (scope_, n); + + // class-decl-seq + // + t = lexer_->next (); + + for (;;) + { + try + { + if (t.type () == token::t_string_lit || + t.punctuation () == token::p_lcbrace) + { + scope_doc (t); + t = lexer_->next (); + } + else + { + if (!option_def (t)) + break; + } + } + catch (error const&) + { + valid_ = false; + recover (t); + } + } + + if (t.punctuation () != token::p_rcbrace) + { + cerr << *path_ << ':' << t.line () << ':' << t.column () << ": error: " + << "expected option, documentation, or '}' instead of " << t << endl; + throw error (); + } + + t = lexer_->next (); + + if (t.punctuation () != token::p_semi) + { + cerr << *path_ << ':' << t.line () << ':' << t.column () << ": error: " + << "expected ';' instead of " << t << endl; + throw error (); + } +} + +bool parser:: +option_def (token& t) +{ + size_t l (t.line ()), c (t.column ()); + + // type-spec + // + // These two functions set t to the next token if they return + // true. + // + string type_name; + + if (!qualified_name (t, type_name) && !fundamental_type (t, type_name)) + return false; + + option* o (0); + + if (valid_) + { + o = &root_->new_node<option> (*path_, l, c); + type& t (root_->new_type (*path_, l, c, type_name)); + root_->new_edge<belongs> (*o, t); + } + + // option-name-seq + // + names::name_list nl; + for (;;) + { + switch (t.type ()) + { + case token::t_identifier: + { + if (valid_) + nl.push_back (t.identifier ()); + + break; + } + case token::t_string_lit: + { + if (valid_) + { + // Get rid of '"'. + // + string r; + string const& l (t.literal ()); + char p ('\0'); + + for (size_t i (0), n (l.size ()); i < n; ++i) + { + if (l[i] == '"' && p != '\\') + continue; + + // We need to keep track of \\ escapings so we don't confuse + // them with \", as in "\\". + // + if (l[i] == '\\' && p == '\\') + p = '\0'; + else + p = l[i]; + + r += l[i]; + } + + nl.push_back (r); + } + + break; + } + default: + { + cerr << *path_ << ':' << t.line () << ':' << t.column () << ": error: " + << "option name expected instead of " << t << endl; + throw error (); + } + } + + t = lexer_->next (); + + if (t.punctuation () == token::p_or) + t = lexer_->next (); + else + break; + } + + if (valid_) + root_->new_edge<names> (*scope_, *o, nl); + + // initializer + // + std::string ev; + expression::expression_type et; + + if (t.punctuation () == token::p_eq) + { + // assignment initiaizer + // + t = lexer_->next (); + + l = t.line (); + c = t.column (); + + if (qualified_name (t, ev)) + { + et = expression::identifier; + } + else + { + switch (t.type ()) + { + case token::t_string_lit: + { + ev = t.literal (); + et = expression::string_lit; + t = lexer_->next (); + break; + } + case token::t_char_lit: + { + ev = t.literal (); + et = expression::char_lit; + t = lexer_->next (); + break; + } + case token::t_bool_lit: + { + ev = t.literal (); + et = expression::bool_lit; + t = lexer_->next (); + break; + } + case token::t_int_lit: + { + ev = t.literal (); + et = expression::int_lit; + t = lexer_->next (); + break; + } + case token::t_float_lit: + { + ev = t.literal (); + et = expression::float_lit; + t = lexer_->next (); + break; + } + case token::t_call_expr: + { + ev = t.expression (); + et = expression::call_expr; + t = lexer_->next (); + break; + } + default: + { + cerr << *path_ << ':' << t.line () << ':' << t.column () + << ": error: expected intializer instead of " << t << endl; + throw error (); + } + } + } + } + else if (t.type () == token::t_call_expr) + { + // c-tor initializer + // + l = t.line (); + c = t.column (); + + ev = t.expression (); + et = expression::call_expr; + t = lexer_->next (); + } + + if (valid_ && !ev.empty ()) + { + expression& e (root_->new_node<expression> (*path_, l, c, et, ev)); + root_->new_edge<initialized> (*o, e); + } + + // option-def-trailer + // + if (t.punctuation () == token::p_lcbrace) + { + // doc-string-seq + // + for (t = lexer_->next ();; t = lexer_->next ()) + { + if (t.type () != token::t_string_lit) + { + cerr << *path_ << ':' << t.line () << ':' << t.column () << ": error: " + << "expected documentation string instead of " << t << endl; + throw error (); + } + + if (valid_) + o->doc ().push_back (doc_string (t.literal ().c_str (), + t.literal ().size ())); + + t = lexer_->next (); + + if (t.punctuation () != token::p_comma) + break; + } + + if (t.punctuation () != token::p_rcbrace) + { + cerr << *path_ << ':' << t.line () << ':' << t.column () << ": error: " + << "expected '}' instead of " << t << endl; + throw error (); + } + + t = lexer_->next (); + + // Allow semicolon after option-doc for backwards compatibility. + // + if (t.punctuation () == token::p_semi) + t = lexer_->next (); + } + else + { + if (t.punctuation () != token::p_semi) + { + cerr << *path_ << ':' << t.line () << ':' << t.column () << ": error: " + << "expected ';' instead of " << t << endl; + throw error (); + } + + t = lexer_->next (); + } + + return true; +} + +string parser:: +doc_string (const char* l, size_t n) +{ + // Pass 1: get rid of " (as in "foo""bar"), convert \" to just ". + // + string t1, t2, t3; + char p ('\0'); // Previous character. + + for (size_t i (0); i < n; ++i) + { + char c (l[i]); + + if (c == '"') + { + if (p == '\\') + { + t1[t1.size () - 1] = '"'; // Replace \ with ". + p = c; + } + continue; + } + + // We need to keep track of \\ escapings so we don't confuse them with \", + // as in \\". + // + if (c == '\\' && p == '\\') + p = '\0'; + else + p = c; + + t1 += c; + } + + // Pass two: get rid of leading and trailing spaces in each line. Also + // handle pre-formatted fragments. + // + if (t1.size () != 0) + { + bool more (true); + size_t b (0), e, p; + + bool pre (false); + size_t m (0); // Number of leading spaces to remove in pre. + + while (more) + { + p = e = t1.find ('\n', b); + + if (p == string::npos) + { + e = t1.size (); + more = false; + } + + if (b != e) // Unless this is just a single newline. + { + // In the pre mode we only remove up to m leading whitespaces. + // + { + size_t i (0); + while (b < e && + (t1[b] == 0x20 || t1[b] == 0x0D || t1[b] == 0x09) && + (!pre || i != m)) + { + ++b; + ++i; + } + + if (!pre) + m = i; + } + + --e; + while (e > b && (t1[e] == 0x20 || t1[e] == 0x0D || t1[e] == 0x09)) + --e; + + // Pre-formatted fragment marker or its escape. + // + if (t1[b] == '\\' && (b == e || (b + 1 == e && t1[e] == '\\'))) + { + // Use Start of Text (0x02) and End of Text (0x03) special + // characters as pre-formatted fragment markers. + // + if (b == e) + { + pre = !pre; + t2 += (pre ? 0x02 : 0x03); + } + else + t2 += "\\\\"; // Keep escaped. + } + else if (b <= e) + t2.append (t1, b, e - b + 1); + } + + if (more) + { + t2 += '\n'; + b = p + 1; + } + } + + if (pre) + { + cerr << *path_ << ": error: missing pre-formatted fragment end marker " + << "in documentation string '" << t1 << "'" << endl; + throw error (); + } + } + + // Pass 3: replace every single newline with single space and all multiple + // newlines (paragraph marker) with a single newline, unless we are in a + // pre-formatted fragment. Also process escapes in pre-formatted fragmens. + // + bool pre (false); + p = '\0'; // Previous character in pre-formatted fragment. + for (size_t i (0), n (t2.size ()); i < n; ++i) + { + char c (t2[i]); + + if (c == '\n' && !pre) + { + size_t j (i); + for (; i + 1 < n && t2[i + 1] == '\n'; ++i) ; + + if (j != 0 && i + 1 != n) // Strip leading and trailing newlines. + t3 += i != j ? '\n' : ' '; + } + else + { + if (c == (pre ? 0x03 : 0x02)) + { + pre = !pre; + + // Kill "inner" newlines (after opening and before closing '/' + // markers). Also check for "outer" newlines so that we always + // have paragraph separation. + // + size_t k (t3.size ()); + if (pre) + { + if (k != 0 && t3[k - 1] != '\n') // Outer. + { + cerr << *path_ << ": error: missing empty line before pre-" + << "formatted fragment start marker in documentation " + << "string '" << t1 << "'" << endl; + throw error (); + } + + ++i; // Skip inner. + } + else + { + if (t3[k - 1] == '\n') // Could be the same as opening if empty. + t3.resize (k - 1); // Pop inner. + + if (i + 2 < n && (t2[i + 1] != '\n' || t2[i + 2] != '\n')) // Outer. + { + cerr << *path_ << ": error: missing empty line after pre-" + << "formatted fragment end marker in documentation " + << "string '" << t1 << "'" << endl; + throw error (); + } + } + + t3 += c; + continue; + } + + if (pre) + { + // In the pre-formatted fragments the only two escapes that we + // recognize are \" which was handled on pass 1 above and \\ which we + // handle here. + // + if (c == '\\' && p == '\\') + { + p = '\0'; // Keep the already added and clear. + continue; + } + + p = c; + } + + t3 += c; + } + } + + return t3; +} + + +bool parser:: +qualified_name (token& t, string& r) +{ + if (t.type () != token::t_identifier && t.punctuation () != token::p_dcolon) + return false; + + r.clear (); + + if (t.punctuation () == token::p_dcolon) + { + r += "::"; + t = lexer_->next (); + } + + for (;;) + { + if (t.type () != token::t_identifier) + { + cerr << *path_ << ':' << t.line () << ':' << t.column () << ": error: " + << "expected identifier after '::'" << endl; + throw error (); + } + + r += t.identifier (); + t = lexer_->next (); + + if (t.type () == token::t_template_expr) + { + // Template-id. + // + r += t.expression (); + t = lexer_->next (); + } + + if (t.punctuation () == token::p_dcolon) + { + r += "::"; + t = lexer_->next (); + } + else + break; + } + + return true; +} + +bool parser:: +fundamental_type (token& t, string& r) +{ + r.clear (); + + switch (t.keyword ()) + { + case token::k_signed: + case token::k_unsigned: + { + r = t.keyword () == token::k_signed ? "signed" : "unsigned"; + switch ((t = lexer_->next ()).keyword ()) + { + case token::k_short: + { + r += " short"; + switch ((t = lexer_->next ()).keyword ()) + { + case token::k_int: + { + r += " int"; + t = lexer_->next (); + } + default: + break; + } + break; + } + case token::k_long: + { + r += " long"; + switch ((t = lexer_->next ()).keyword ()) + { + case token::k_int: + { + r += " int"; + t = lexer_->next (); + break; + } + case token::k_long: + { + r += " long"; + switch ((t = lexer_->next ()).keyword ()) + { + case token::k_int: + { + r += " int"; + t = lexer_->next (); + } + default: + break; + } + break; + } + default: + break; + } + break; + } + case token::k_int: + { + r += " int"; + switch ((t = lexer_->next ()).keyword ()) + { + case token::k_short: + { + r += " short"; + t = lexer_->next (); + break; + } + case token::k_long: + { + r += " long"; + switch ((t = lexer_->next ()).keyword ()) + { + case token::k_long: + { + r += " long"; + t = lexer_->next (); + } + default: + break; + } + break; + } + default: + break; + } + break; + } + case token::k_char: + { + r += " char"; + t = lexer_->next (); + break; + } + default: + break; + } + break; + } + case token::k_short: + case token::k_long: + { + bool l (t.keyword () == token::k_long); + r = l ? "long" : "short"; + + switch ((t = lexer_->next ()).keyword ()) + { + case token::k_signed: + case token::k_unsigned: + { + r += t.keyword () == token::k_signed ? " signed" : " unsigned"; + switch ((t = lexer_->next ()).keyword ()) + { + case token::k_int: + { + r += " int"; + t = lexer_->next (); + } + default: + break; + } + break; + } + case token::k_long: + { + r += " long"; + switch ((t = lexer_->next ()).keyword ()) + { + case token::k_signed: + case token::k_unsigned: + { + r += t.keyword () == token::k_signed ? " signed" : " unsigned"; + switch ((t = lexer_->next ()).keyword ()) + { + case token::k_int: + { + r += " int"; + t = lexer_->next (); + } + default: + break; + } + break; + } + case token::k_int: + { + r += " int"; + switch ((t = lexer_->next ()).keyword ()) + { + case token::k_signed: + { + r += " signed"; + t = lexer_->next (); + break; + } + case token::k_unsigned: + { + r += " unsigned"; + t = lexer_->next (); + break; + } + default: + break; + } + break; + } + default: + break; + } + break; + } + case token::k_int: + { + r += " int"; + switch ((t = lexer_->next ()).keyword ()) + { + case token::k_signed: + { + r += " signed"; + t = lexer_->next (); + break; + } + case token::k_unsigned: + { + r += " unsigned"; + t = lexer_->next (); + break; + } + default: + break; + } + break; + } + case token::k_double: + { + if (l) + { + r += " double"; + t = lexer_->next (); + } + break; + } + default: + break; + } + break; + } + case token::k_int: + { + r = "int"; + switch ((t = lexer_->next ()).keyword ()) + { + case token::k_signed: + case token::k_unsigned: + { + r += t.keyword () == token::k_signed ? " signed" : " unsigned"; + switch ((t = lexer_->next ()).keyword ()) + { + case token::k_short: + { + r += " short"; + t = lexer_->next (); + break; + } + case token::k_long: + { + r += " long"; + switch ((t = lexer_->next ()).keyword ()) + { + case token::k_long: + { + r += " long"; + t = lexer_->next (); + } + default: + break; + } + } + default: + break; + } + break; + } + case token::k_short: + { + r += " short"; + switch ((t = lexer_->next ()).keyword ()) + { + case token::k_signed: + { + r += " signed"; + t = lexer_->next (); + break; + } + case token::k_unsigned: + { + r += " unsigned"; + t = lexer_->next (); + break; + } + default: + break; + } + break; + } + case token::k_long: + { + r += " long"; + switch ((t = lexer_->next ()).keyword ()) + { + case token::k_signed: + { + r += " signed"; + t = lexer_->next (); + break; + } + case token::k_unsigned: + { + r += " unsigned"; + t = lexer_->next (); + break; + } + case token::k_long: + { + r += " long"; + switch ((t = lexer_->next ()).keyword ()) + { + case token::k_signed: + { + r += " signed"; + t = lexer_->next (); + break; + } + case token::k_unsigned: + { + r += " unsigned"; + t = lexer_->next (); + break; + } + default: + break; + } + break; + } + default: + break; + } + break; + } + default: + break; + } + break; + } + case token::k_char: + { + r = "char"; + switch ((t = lexer_->next ()).keyword ()) + { + case token::k_signed: + { + r += " signed"; + t = lexer_->next (); + break; + } + case token::k_unsigned: + { + r += " unsigned"; + t = lexer_->next (); + break; + } + default: + break; + } + break; + } + case token::k_bool: + { + r = "bool"; + t = lexer_->next (); + break; + } + case token::k_wchar: + { + r = "wchar_t"; + t = lexer_->next (); + break; + } + case token::k_float: + { + r = "float"; + t = lexer_->next (); + break; + } + case token::k_double: + { + r = "double"; + switch ((t = lexer_->next ()).keyword ()) + { + case token::k_long: + { + r += " long"; + t = lexer_->next (); + } + default: + break; + } + break; + } + default: + return false; + } + + return true; +} |