diff options
author | Karen Arutyunov <karen@codesynthesis.com> | 2020-04-08 14:51:57 +0300 |
---|---|---|
committer | Karen Arutyunov <karen@codesynthesis.com> | 2020-04-27 11:38:53 +0300 |
commit | 720c5a33b6a49cf328fdd7611f49153cf8f60247 (patch) | |
tree | 9725f3d1f42ec90fde84520f49647edea013ce5e /cli/parser.cxx | |
parent | 3183f3bb927a90783ae0aeaf190a0919377aabe4 (diff) |
Separate tests and examples into individual packages
Also make cli module to be explicitly enabled via the config.cli configuration
variable.
Diffstat (limited to 'cli/parser.cxx')
-rw-r--r-- | cli/parser.cxx | 1728 |
1 files changed, 0 insertions, 1728 deletions
diff --git a/cli/parser.cxx b/cli/parser.cxx deleted file mode 100644 index 4685edc..0000000 --- a/cli/parser.cxx +++ /dev/null @@ -1,1728 +0,0 @@ -// file : cli/parser.cxx -// author : Boris Kolpackov <boris@codesynthesis.com> -// license : MIT; see accompanying LICENSE file - -#ifndef _WIN32 -# include <unistd.h> // stat -# include <sys/types.h> // stat -# include <sys/stat.h> // stat -#else -# include <sys/types.h> // _stat -# include <sys/stat.h> // _stat(), S_I* - -# ifdef _MSC_VER // Unlikely to be fixed in newer versions. -# define S_ISREG(m) (((m) & S_IFMT) == S_IFREG) -# endif -#endif - -#include <fstream> -#include <sstream> -#include <iostream> - -#include <cli/token.hxx> -#include <cli/lexer.hxx> -#include <cli/parser.hxx> - -#include <cli/semantics.hxx> - -using namespace std; -using namespace semantics; - -// Check that the file exist without checking for permissions, etc. -// -inline static bool -file_exists (const path& p) -{ -#ifndef _WIN32 - struct stat s; - int r (stat (p.string ().c_str (), &s)); -#else - struct _stat s; - int r (_stat (p.string ().c_str (), &s)); -#endif - - return r == 0 && S_ISREG (s.st_mode); -} - -const char* keywords[] = -{ - "include", - "namespace", - "class", - "signed", - "unsigned", - "bool", - "char", - "wchar_t", - "short", - "int", - "long", - "float", - "double" -}; - -const char* punctuation[] = { - ";", ",", ":", "::", "{", "}", /*"(", ")",*/ "=", "|"}; - -// Output the token type and value in a format suitable for diagnostics. -// -std::ostream& -operator<< (std::ostream& os, token const& t) -{ - switch (t.type ()) - { - case token::t_eos: - { - os << "end-of-stream"; - break; - } - case token::t_keyword: - { - os << "keyword '" << keywords[t.keyword ()] << "'"; - break; - } - case token::t_identifier: - { - os << "identifier '" << t.identifier () << "'"; - break; - } - case token::t_punctuation: - { - os << "'" << punctuation[t.punctuation ()] << "'"; - break; - } - case token::t_cxx_path_lit: - { - os << "c++ path literal"; - break; - } - case token::t_cli_path_lit: - { - os << "cli path literal"; - break; - } - case token::t_string_lit: - { - os << "string literal"; - break; - } - case token::t_char_lit: - { - os << "char literal"; - break; - } - case token::t_bool_lit: - { - os << "bool literal"; - break; - } - case token::t_int_lit: - { - os << "integer literal"; - break; - } - case token::t_float_lit: - { - os << "floating point literal"; - break; - } - case token::t_call_expr: - { - os << "call expression"; - break; - } - case token::t_template_expr: - { - os << "template expression"; - break; - } - } - - return os; -} - -// RAII-style set new value on construction, restore old one on destruction. -// -template <typename T> -struct auto_restore -{ - auto_restore (T*& var, T* new_val = 0) - : var_ (var), old_val_ (var_) - { - if (new_val != 0) - var_ = new_val; - } - - void - set (T* new_val) {var_ = new_val;} - - ~auto_restore () {var_ = old_val_;} - -private: - T*& var_; - T* old_val_; -}; - - -void parser:: -recover (token& t) -{ - // Recover by skipping past next ';' or '}'. - // - for (;; t = lexer_->next ()) - { - if (t.type () == token::t_eos) - break; - - token::punctuation_type p (t.punctuation ()); - - if (p == token::p_semi || p == token::p_rcbrace) - { - t = lexer_->next (); - break; - } - } -} - -unique_ptr<cli_unit> parser:: -parse (std::istream& is, path const& p) -{ - unique_ptr<cli_unit> unit (new cli_unit (p, 1, 1)); - - { - path ap (p); - ap.absolute (); - ap.normalize (); - include_map_[ap] = unit.get (); - } - - root_ = cur_ = unit.get (); - - lexer l (is, p.string ()); - lexer_ = &l; - - doc_count_ = 0; - - path_ = &p; - valid_ = true; - - def_unit (); - - if (!valid_ || !l.valid ()) - throw invalid_input (); - - return unit; -} - -void parser:: -def_unit () -{ - token t (lexer_->next ()); - - // include-decl-seq - // - for (token::keyword_type k (t.keyword ()); - k == token::k_include || k == token::k_source; - k = t.keyword ()) - { - try - { - if (k == token::k_include) - include_decl (); - else - source_decl (); - - t = lexer_->next (); - } - catch (error const&) - { - valid_ = false; - recover (t); - } - } - - auto_restore<scope> new_scope (scope_, cur_); - - // decl-seq - // - while (t.type () != token::t_eos) - { - try - { - if (t.keyword () == token::k_source) - { - try - { - source_decl (); - t = lexer_->next (); - } - catch (error const&) - { - valid_ = false; - recover (t); - } - - continue; - } - - if (decl (t)) - { - t = lexer_->next (); - continue; - } - - cerr << *path_ << ':' << t.line () << ':' << t.column () << ": error: " - << "expected namespace, class, or documentation instead of " - << t << endl; - throw error (); - } - catch (error const&) - { - valid_ = false; - break; // Non-recoverable error. - } - } -} - -void parser:: -source_decl () -{ - token t (lexer_->next ()); - - if (t.type () != token::t_cli_path_lit) - { - cerr << *path_ << ':' << t.line () << ':' << t.column () << ": error: " - << "expected cli path literal instead of " << t << endl; - throw error (); - } - - string const& l (t.literal ()); - bool q (l[0] == '"'); // Quote or braket include? - - path f; - try - { - f = path (string (l, 1, l.size () - 2)); - } - catch (const invalid_path& e) - { - cerr << *path_ << ':' << t.line () << ':' << t.column () << ": error: " - << "'" << e.path () << "' is not a valid filesystem path" << endl; - valid_ = false; - } - - if (valid_) - { - path p; - - // If this is a quote include, then include relative to the current - // file. - // - if (q) - { - p = path_->directory () / f; - p.normalize (); - } - // Otherwise search the include directories (-I). - // - else - { - for (paths::const_iterator i (include_paths_.begin ()); - i != include_paths_.end (); ++i) - { - p = *i / f; - p.normalize (); - - if (file_exists (p)) - break; - - p.clear (); - } - - if (p.empty ()) - { - cerr << *path_ << ':' << t.line () << ':' << t.column () << ": " - << "error: file '" << f << "' not found in any of the " - << "include search directories (-I)" << endl; - valid_ = false; - } - } - - if (valid_) - { - auto_restore<path const> new_path (path_, &p); - - ifstream ifs (p.string ().c_str ()); - if (ifs.is_open ()) - { - ifs.exceptions (ifstream::failbit | ifstream::badbit); - - try - { - lexer l (ifs, p.string ()); - auto_restore<lexer> new_lexer (lexer_, &l); - - def_unit (); - - if (!l.valid ()) - valid_ = false; - } - catch (std::ios_base::failure const&) - { - cerr << p << ": error: read failure" << endl; - valid_ = false; - } - } - else - { - cerr << p << ": error: unable to open in read mode" << endl; - valid_ = false; - } - } - } - - t = lexer_->next (); - - if (t.punctuation () != token::p_semi) - { - cerr << *path_ << ':' << t.line () << ':' << t.column () << ": error: " - << "expected ';' instead of " << t << endl; - throw error (); - } -} - -void parser:: -include_decl () -{ - token t (lexer_->next ()); - token::token_type tt (t.type ()); - - if (tt != token::t_cxx_path_lit && tt != token::t_cli_path_lit) - { - cerr << *path_ << ':' << t.line () << ':' << t.column () << ": error: " - << "expected path literal instead of " << t << endl; - throw error (); - } - - string const& l (t.literal ()); - includes::kind_type ik (l[0] == '<' ? includes::bracket : includes::quote); - - path f; - try - { - f = path (string (l, 1, l.size () - 2)); - } - catch (const invalid_path& e) - { - cerr << *path_ << ':' << t.line () << ':' << t.column () << ": error: " - << "'" << e.path () << "' is not a valid filesystem path" << endl; - valid_ = false; - } - - if (valid_) - { - if (tt == token::t_cxx_path_lit) - { - cxx_unit& n ( - root_->new_node<cxx_unit> (*path_, t.line (), t.column ())); - root_->new_edge<cxx_includes> (*cur_, n, ik, f); - } - else - { - path p; - // If this is a quote include, then include relative to the current - // file. - // - if (ik == includes::quote) - { - p = path_->directory () / f; - p.normalize (); - } - // Otherwise search the include directories (-I). - // - else - { - for (paths::const_iterator i (include_paths_.begin ()); - i != include_paths_.end (); ++i) - { - p = *i / f; - p.normalize (); - - if (file_exists (p)) - break; - - p.clear (); - } - - if (p.empty ()) - { - cerr << *path_ << ':' << t.line () << ':' << t.column () << ": " - << "error: file '" << f << "' not found in any of the " - << "include search directories (-I)" << endl; - valid_ = false; - } - } - - if (valid_) - { - // Detect and ignore multiple inclusions. - // - path ap (p); - ap.absolute (); - ap.normalize (); - - include_map::iterator it (include_map_.find (ap)); - if (it == include_map_.end ()) - { - cli_unit& n (root_->new_node<cli_unit> (p, 1, 1)); - root_->new_edge<cli_includes> (*cur_, n, ik, f); - include_map_[ap] = &n; - - auto_restore<cli_unit> new_cur (cur_, &n); - auto_restore<path const> new_path (path_, &p); - - ifstream ifs (p.string ().c_str ()); - if (ifs.is_open ()) - { - ifs.exceptions (ifstream::failbit | ifstream::badbit); - - try - { - lexer l (ifs, p.string ()); - auto_restore<lexer> new_lexer (lexer_, &l); - - def_unit (); - - if (!l.valid ()) - valid_ = false; - } - catch (std::ios_base::failure const&) - { - cerr << p << ": error: read failure" << endl; - valid_ = false; - } - } - else - { - cerr << p << ": error: unable to open in read mode" << endl; - valid_ = false; - } - } - else - root_->new_edge<cli_includes> (*cur_, *it->second, ik, f); - } - } - } - - t = lexer_->next (); - - if (t.punctuation () != token::p_semi) - { - cerr << *path_ << ':' << t.line () << ':' << t.column () << ": error: " - << "expected ';' instead of " << t << endl; - throw error (); - } -} - -bool parser:: -decl (token& t) -{ - switch (t.type ()) - { - case token::t_keyword: - { - switch (t.keyword ()) - { - case token::k_namespace: - { - namespace_def (); - return true; - } - case token::k_class: - { - class_def (); - return true; - } - default: - break; - } - - break; - } - case token::t_punctuation: - { - if (t.punctuation () != token::p_lcbrace) - break; - } - // Fall through. - case token::t_string_lit: - { - scope_doc (t); - return true; - } - default: - break; - } - - return false; -} - -void parser:: -scope_doc (token& t) -{ - size_t ln (t.line ()), cl (t.column ()); - - // Use a counter to give scope-level docs unique names. We use a - // single counter throughout all units/scope because we could be - // reopening namespaces. - // - if (t.type () == token::t_string_lit) - { - // string-literal - // - if (valid_) - { - // Enter each ""-enclosed string as a separate documentation - // entry, handle documentation variables. - // - const string& l (t.literal ()); - - char p ('\0'); - for (size_t b (0), e (1); e < l.size (); ++e) - { - if (l[e] == '"' && p != '\\') - { - string s (doc_string (l.c_str () + b, e - b + 1)); - - if (!s.empty ()) - { - doc& d (root_->new_node<doc> (*path_, ln, cl)); - - // See if this is a variable assignment: "\<var>=<val>". - // - size_t p (0); // '=' position. - if (s.size () >= 3 && s[0] == '\\' && s[1] != '\\') - { - for (p = 1; p != s.size (); ++p) - { - char c (s[p]); - - // Variable name should be a C identifier. - // - if (!(c == '_' || - ('a' <= c && c <= 'z') || - ('A' <= c && c <= 'Z') || - (p != 1 && '0' <= c && c <= '9'))) - break; - } - - if (p == s.size () || s[p] != '=' || p == 1) // Not a variable. - p = 0; - } - - if (p != 0) - { - root_->new_edge<names> ( - *scope_, d, "var: " + string (s, 1, p - 1)); - s = string (s, p + 1); - } - else - { - ostringstream os; - os << "doc: " << doc_count_++; - root_->new_edge<names> (*scope_, d, os.str ()); - } - - d.push_back (s); // move(). - } - - // If we have more, then make b point to the opening '"'. Second - // ++e in for() above will make e point to the character after it. - // - b = ++e; - continue; - } - - // We need to keep track of \\ escapings so we don't confuse - // them with \", as in \\". - // - if (l[e] == '\\' && p == '\\') - p = '\0'; - else - p = l[e]; - } - } - } - else - { - // doc-string-seq - // - assert (t.punctuation () == token::p_lcbrace); - - doc* d (0); - if (valid_) - { - ostringstream os; - os << "doc: " << doc_count_++; - - d = &root_->new_node<doc> (*path_, ln, cl); - root_->new_edge<names> (*scope_, *d, os.str ()); - } - - for (t = lexer_->next ();; t = lexer_->next ()) - { - if (t.type () != token::t_string_lit) - { - cerr << *path_ << ':' << t.line () << ':' << t.column () << ": error: " - << "expected documentation string instead of " << t << endl; - throw error (); - } - - if (valid_) - d->push_back (doc_string (t.literal ().c_str (), - t.literal ().size ())); - - t = lexer_->next (); - - if (t.punctuation () != token::p_comma) - break; - } - - if (t.punctuation () != token::p_rcbrace) - { - cerr << *path_ << ':' << t.line () << ':' << t.column () << ": error: " - << "expected '}' instead of " << t << endl; - throw error (); - } - } -} - -void parser:: -namespace_def () -{ - token t (lexer_->next ()); - - if (t.type () != token::t_identifier) - { - cerr << *path_ << ':' << t.line () << ':' << t.column () << ": error: " - << "expected identifier instead of " << t << endl; - throw error (); - } - - auto_restore<scope> new_scope (scope_); - - if (valid_) - { - namespace_& n ( - root_->new_node<namespace_> (*path_, t.line (), t.column ())); - root_->new_edge<names> (*scope_, n, t.identifier ()); - new_scope.set (&n); - } - - t = lexer_->next (); - - if (t.punctuation () != token::p_lcbrace) - { - cerr << *path_ << ':' << t.line () << ':' << t.column () << ": error: " - << "expected '{' instead of " << t << endl; - throw error (); - } - - // decl-seq - // - t = lexer_->next (); - - while (decl (t)) - t = lexer_->next (); - - if (t.punctuation () != token::p_rcbrace) - { - cerr << *path_ << ':' << t.line () << ':' << t.column () << ": error: " - << "expected namespace, class, documentation, or '}' instead of " - << t << endl; - throw error (); - } -} - -void parser:: -class_def () -{ - token t (lexer_->next ()); - - if (t.type () != token::t_identifier) - { - cerr << *path_ << ':' << t.line () << ':' << t.column () << ": error: " - << "expected identifier instead of " << t << endl; - throw error (); - } - - class_* n (0); - if (valid_) - { - n = &root_->new_node<class_> (*path_, t.line (), t.column ()); - root_->new_edge<names> (*scope_, *n, t.identifier ()); - } - - t = lexer_->next (); - - // inheritance-spec - // - if (t.punctuation () == token::p_colon) - { - for (;;) - { - t = lexer_->next (); - size_t line (t.line ()), col (t.column ()); - - string name; - if (!qualified_name (t, name)) - { - cerr << *path_ << ':' << t.line () << ':' << t.column () << ": error: " - << "expected qualified name instead of " << t << endl; - throw error (); - } - - string ns; - - // If it is a fully-qualifed name, then start from the global namespace. - // Otherwise, from the current scope. - // - if (name[0] == ':') - name = string (name, 2, string::npos); - else - ns = scope_->fq_name (); - - if (class_* b = cur_->lookup<class_> (ns, name)) - root_->new_edge<inherits> (*n, *b); - else - { - cerr << *path_ << ':' << line << ':' << col << ": error: " - << "unable to resolve base class '" << name << "'" << endl; - valid_ = false; - } - - if (t.punctuation () != token::p_comma) - break; - } - } - - // abstract-spec - // - if (t.punctuation () == token::p_eq) - { - t = lexer_->next (); - - if (t.type () != token::t_int_lit || t.literal () != "0") - { - cerr << *path_ << ':' << t.line () << ':' << t.column () << ": error: " - << "expected '0' instead of " << t << endl; - throw error (); - } - - if (n != 0) - n->abstract (true); - - t = lexer_->next (); - } - - if (t.punctuation () != token::p_lcbrace) - { - cerr << *path_ << ':' << t.line () << ':' << t.column () << ": error: " - << "expected '{' instead of " << t << endl; - throw error (); - } - - auto_restore<scope> new_scope (scope_, n); - - // class-decl-seq - // - t = lexer_->next (); - - for (;;) - { - try - { - if (t.type () == token::t_string_lit || - t.punctuation () == token::p_lcbrace) - { - scope_doc (t); - t = lexer_->next (); - } - else - { - if (!option_def (t)) - break; - } - } - catch (error const&) - { - valid_ = false; - recover (t); - } - } - - if (t.punctuation () != token::p_rcbrace) - { - cerr << *path_ << ':' << t.line () << ':' << t.column () << ": error: " - << "expected option, documentation, or '}' instead of " << t << endl; - throw error (); - } - - t = lexer_->next (); - - if (t.punctuation () != token::p_semi) - { - cerr << *path_ << ':' << t.line () << ':' << t.column () << ": error: " - << "expected ';' instead of " << t << endl; - throw error (); - } -} - -bool parser:: -option_def (token& t) -{ - size_t l (t.line ()), c (t.column ()); - - // type-spec - // - // These two functions set t to the next token if they return - // true. - // - string type_name; - - if (!qualified_name (t, type_name) && !fundamental_type (t, type_name)) - return false; - - option* o (0); - - if (valid_) - { - o = &root_->new_node<option> (*path_, l, c); - type& t (root_->new_type (*path_, l, c, type_name)); - root_->new_edge<belongs> (*o, t); - } - - // option-name-seq - // - names::name_list nl; - for (;;) - { - switch (t.type ()) - { - case token::t_identifier: - { - if (valid_) - nl.push_back (t.identifier ()); - - break; - } - case token::t_string_lit: - { - if (valid_) - { - // Get rid of '"'. - // - string r; - string const& l (t.literal ()); - char p ('\0'); - - for (size_t i (0), n (l.size ()); i < n; ++i) - { - if (l[i] == '"' && p != '\\') - continue; - - // We need to keep track of \\ escapings so we don't confuse - // them with \", as in "\\". - // - if (l[i] == '\\' && p == '\\') - p = '\0'; - else - p = l[i]; - - r += l[i]; - } - - nl.push_back (r); - } - - break; - } - default: - { - cerr << *path_ << ':' << t.line () << ':' << t.column () << ": error: " - << "option name expected instead of " << t << endl; - throw error (); - } - } - - t = lexer_->next (); - - if (t.punctuation () == token::p_or) - t = lexer_->next (); - else - break; - } - - if (valid_) - root_->new_edge<names> (*scope_, *o, nl); - - // initializer - // - std::string ev; - expression::expression_type et; - - if (t.punctuation () == token::p_eq) - { - // assignment initiaizer - // - t = lexer_->next (); - - l = t.line (); - c = t.column (); - - if (qualified_name (t, ev)) - { - et = expression::identifier; - } - else - { - switch (t.type ()) - { - case token::t_string_lit: - { - ev = t.literal (); - et = expression::string_lit; - t = lexer_->next (); - break; - } - case token::t_char_lit: - { - ev = t.literal (); - et = expression::char_lit; - t = lexer_->next (); - break; - } - case token::t_bool_lit: - { - ev = t.literal (); - et = expression::bool_lit; - t = lexer_->next (); - break; - } - case token::t_int_lit: - { - ev = t.literal (); - et = expression::int_lit; - t = lexer_->next (); - break; - } - case token::t_float_lit: - { - ev = t.literal (); - et = expression::float_lit; - t = lexer_->next (); - break; - } - case token::t_call_expr: - { - ev = t.expression (); - et = expression::call_expr; - t = lexer_->next (); - break; - } - default: - { - cerr << *path_ << ':' << t.line () << ':' << t.column () - << ": error: expected intializer instead of " << t << endl; - throw error (); - } - } - } - } - else if (t.type () == token::t_call_expr) - { - // c-tor initializer - // - l = t.line (); - c = t.column (); - - ev = t.expression (); - et = expression::call_expr; - t = lexer_->next (); - } - - if (valid_ && !ev.empty ()) - { - expression& e (root_->new_node<expression> (*path_, l, c, et, ev)); - root_->new_edge<initialized> (*o, e); - } - - // option-def-trailer - // - if (t.punctuation () == token::p_lcbrace) - { - // doc-string-seq - // - for (t = lexer_->next ();; t = lexer_->next ()) - { - if (t.type () != token::t_string_lit) - { - cerr << *path_ << ':' << t.line () << ':' << t.column () << ": error: " - << "expected documentation string instead of " << t << endl; - throw error (); - } - - if (valid_) - o->doc ().push_back (doc_string (t.literal ().c_str (), - t.literal ().size ())); - - t = lexer_->next (); - - if (t.punctuation () != token::p_comma) - break; - } - - if (t.punctuation () != token::p_rcbrace) - { - cerr << *path_ << ':' << t.line () << ':' << t.column () << ": error: " - << "expected '}' instead of " << t << endl; - throw error (); - } - - t = lexer_->next (); - - // Allow semicolon after option-doc for backwards compatibility. - // - if (t.punctuation () == token::p_semi) - t = lexer_->next (); - } - else - { - if (t.punctuation () != token::p_semi) - { - cerr << *path_ << ':' << t.line () << ':' << t.column () << ": error: " - << "expected ';' instead of " << t << endl; - throw error (); - } - - t = lexer_->next (); - } - - return true; -} - -string parser:: -doc_string (const char* l, size_t n) -{ - // Pass 1: get rid of " (as in "foo""bar"), convert \" to just ". - // - string t1, t2, t3; - char p ('\0'); // Previous character. - - for (size_t i (0); i < n; ++i) - { - char c (l[i]); - - if (c == '"') - { - if (p == '\\') - { - t1[t1.size () - 1] = '"'; // Replace \ with ". - p = c; - } - continue; - } - - // We need to keep track of \\ escapings so we don't confuse them with \", - // as in \\". - // - if (c == '\\' && p == '\\') - p = '\0'; - else - p = c; - - t1 += c; - } - - // Pass two: get rid of leading and trailing spaces in each line. Also - // handle pre-formatted fragments. - // - if (t1.size () != 0) - { - bool more (true); - size_t b (0), e, p; - - bool pre (false); - size_t m (0); // Number of leading spaces to remove in pre. - - while (more) - { - p = e = t1.find ('\n', b); - - if (p == string::npos) - { - e = t1.size (); - more = false; - } - - if (b != e) // Unless this is just a single newline. - { - // In the pre mode we only remove up to m leading whitespaces. - // - { - size_t i (0); - while (b < e && - (t1[b] == 0x20 || t1[b] == 0x0D || t1[b] == 0x09) && - (!pre || i != m)) - { - ++b; - ++i; - } - - if (!pre) - m = i; - } - - --e; - while (e > b && (t1[e] == 0x20 || t1[e] == 0x0D || t1[e] == 0x09)) - --e; - - // Pre-formatted fragment marker or its escape. - // - if (t1[b] == '\\' && (b == e || (b + 1 == e && t1[e] == '\\'))) - { - // Use Start of Text (0x02) and End of Text (0x03) special - // characters as pre-formatted fragment markers. - // - if (b == e) - { - pre = !pre; - t2 += (pre ? 0x02 : 0x03); - } - else - t2 += "\\\\"; // Keep escaped. - } - else if (b <= e) - t2.append (t1, b, e - b + 1); - } - - if (more) - { - t2 += '\n'; - b = p + 1; - } - } - - if (pre) - { - cerr << *path_ << ": error: missing pre-formatted fragment end marker " - << "in documentation string '" << t1 << "'" << endl; - throw error (); - } - } - - // Pass 3: replace every single newline with single space and all multiple - // newlines (paragraph marker) with a single newline, unless we are in a - // pre-formatted fragment. Also process escapes in pre-formatted fragmens. - // - bool pre (false); - p = '\0'; // Previous character in pre-formatted fragment. - for (size_t i (0), n (t2.size ()); i < n; ++i) - { - char c (t2[i]); - - if (c == '\n' && !pre) - { - size_t j (i); - for (; i + 1 < n && t2[i + 1] == '\n'; ++i) ; - - if (j != 0 && i + 1 != n) // Strip leading and trailing newlines. - t3 += i != j ? '\n' : ' '; - } - else - { - if (c == (pre ? 0x03 : 0x02)) - { - pre = !pre; - - // Kill "inner" newlines (after opening and before closing '/' - // markers). Also check for "outer" newlines so that we always - // have paragraph separation. - // - size_t k (t3.size ()); - if (pre) - { - if (k != 0 && t3[k - 1] != '\n') // Outer. - { - cerr << *path_ << ": error: missing empty line before pre-" - << "formatted fragment start marker in documentation " - << "string '" << t1 << "'" << endl; - throw error (); - } - - ++i; // Skip inner. - } - else - { - if (t3[k - 1] == '\n') // Could be the same as opening if empty. - t3.resize (k - 1); // Pop inner. - - if (i + 2 < n && (t2[i + 1] != '\n' || t2[i + 2] != '\n')) // Outer. - { - cerr << *path_ << ": error: missing empty line after pre-" - << "formatted fragment end marker in documentation " - << "string '" << t1 << "'" << endl; - throw error (); - } - } - - t3 += c; - continue; - } - - if (pre) - { - // In the pre-formatted fragments the only two escapes that we - // recognize are \" which was handled on pass 1 above and \\ which we - // handle here. - // - if (c == '\\' && p == '\\') - { - p = '\0'; // Keep the already added and clear. - continue; - } - - p = c; - } - - t3 += c; - } - } - - return t3; -} - - -bool parser:: -qualified_name (token& t, string& r) -{ - if (t.type () != token::t_identifier && t.punctuation () != token::p_dcolon) - return false; - - r.clear (); - - if (t.punctuation () == token::p_dcolon) - { - r += "::"; - t = lexer_->next (); - } - - for (;;) - { - if (t.type () != token::t_identifier) - { - cerr << *path_ << ':' << t.line () << ':' << t.column () << ": error: " - << "expected identifier after '::'" << endl; - throw error (); - } - - r += t.identifier (); - t = lexer_->next (); - - if (t.type () == token::t_template_expr) - { - // Template-id. - // - r += t.expression (); - t = lexer_->next (); - } - - if (t.punctuation () == token::p_dcolon) - { - r += "::"; - t = lexer_->next (); - } - else - break; - } - - return true; -} - -bool parser:: -fundamental_type (token& t, string& r) -{ - r.clear (); - - switch (t.keyword ()) - { - case token::k_signed: - case token::k_unsigned: - { - r = t.keyword () == token::k_signed ? "signed" : "unsigned"; - switch ((t = lexer_->next ()).keyword ()) - { - case token::k_short: - { - r += " short"; - switch ((t = lexer_->next ()).keyword ()) - { - case token::k_int: - { - r += " int"; - t = lexer_->next (); - } - default: - break; - } - break; - } - case token::k_long: - { - r += " long"; - switch ((t = lexer_->next ()).keyword ()) - { - case token::k_int: - { - r += " int"; - t = lexer_->next (); - break; - } - case token::k_long: - { - r += " long"; - switch ((t = lexer_->next ()).keyword ()) - { - case token::k_int: - { - r += " int"; - t = lexer_->next (); - } - default: - break; - } - break; - } - default: - break; - } - break; - } - case token::k_int: - { - r += " int"; - switch ((t = lexer_->next ()).keyword ()) - { - case token::k_short: - { - r += " short"; - t = lexer_->next (); - break; - } - case token::k_long: - { - r += " long"; - switch ((t = lexer_->next ()).keyword ()) - { - case token::k_long: - { - r += " long"; - t = lexer_->next (); - } - default: - break; - } - break; - } - default: - break; - } - break; - } - case token::k_char: - { - r += " char"; - t = lexer_->next (); - break; - } - default: - break; - } - break; - } - case token::k_short: - case token::k_long: - { - bool l (t.keyword () == token::k_long); - r = l ? "long" : "short"; - - switch ((t = lexer_->next ()).keyword ()) - { - case token::k_signed: - case token::k_unsigned: - { - r += t.keyword () == token::k_signed ? " signed" : " unsigned"; - switch ((t = lexer_->next ()).keyword ()) - { - case token::k_int: - { - r += " int"; - t = lexer_->next (); - } - default: - break; - } - break; - } - case token::k_long: - { - r += " long"; - switch ((t = lexer_->next ()).keyword ()) - { - case token::k_signed: - case token::k_unsigned: - { - r += t.keyword () == token::k_signed ? " signed" : " unsigned"; - switch ((t = lexer_->next ()).keyword ()) - { - case token::k_int: - { - r += " int"; - t = lexer_->next (); - } - default: - break; - } - break; - } - case token::k_int: - { - r += " int"; - switch ((t = lexer_->next ()).keyword ()) - { - case token::k_signed: - { - r += " signed"; - t = lexer_->next (); - break; - } - case token::k_unsigned: - { - r += " unsigned"; - t = lexer_->next (); - break; - } - default: - break; - } - break; - } - default: - break; - } - break; - } - case token::k_int: - { - r += " int"; - switch ((t = lexer_->next ()).keyword ()) - { - case token::k_signed: - { - r += " signed"; - t = lexer_->next (); - break; - } - case token::k_unsigned: - { - r += " unsigned"; - t = lexer_->next (); - break; - } - default: - break; - } - break; - } - case token::k_double: - { - if (l) - { - r += " double"; - t = lexer_->next (); - } - break; - } - default: - break; - } - break; - } - case token::k_int: - { - r = "int"; - switch ((t = lexer_->next ()).keyword ()) - { - case token::k_signed: - case token::k_unsigned: - { - r += t.keyword () == token::k_signed ? " signed" : " unsigned"; - switch ((t = lexer_->next ()).keyword ()) - { - case token::k_short: - { - r += " short"; - t = lexer_->next (); - break; - } - case token::k_long: - { - r += " long"; - switch ((t = lexer_->next ()).keyword ()) - { - case token::k_long: - { - r += " long"; - t = lexer_->next (); - } - default: - break; - } - } - default: - break; - } - break; - } - case token::k_short: - { - r += " short"; - switch ((t = lexer_->next ()).keyword ()) - { - case token::k_signed: - { - r += " signed"; - t = lexer_->next (); - break; - } - case token::k_unsigned: - { - r += " unsigned"; - t = lexer_->next (); - break; - } - default: - break; - } - break; - } - case token::k_long: - { - r += " long"; - switch ((t = lexer_->next ()).keyword ()) - { - case token::k_signed: - { - r += " signed"; - t = lexer_->next (); - break; - } - case token::k_unsigned: - { - r += " unsigned"; - t = lexer_->next (); - break; - } - case token::k_long: - { - r += " long"; - switch ((t = lexer_->next ()).keyword ()) - { - case token::k_signed: - { - r += " signed"; - t = lexer_->next (); - break; - } - case token::k_unsigned: - { - r += " unsigned"; - t = lexer_->next (); - break; - } - default: - break; - } - break; - } - default: - break; - } - break; - } - default: - break; - } - break; - } - case token::k_char: - { - r = "char"; - switch ((t = lexer_->next ()).keyword ()) - { - case token::k_signed: - { - r += " signed"; - t = lexer_->next (); - break; - } - case token::k_unsigned: - { - r += " unsigned"; - t = lexer_->next (); - break; - } - default: - break; - } - break; - } - case token::k_bool: - { - r = "bool"; - t = lexer_->next (); - break; - } - case token::k_wchar: - { - r = "wchar_t"; - t = lexer_->next (); - break; - } - case token::k_float: - { - r = "float"; - t = lexer_->next (); - break; - } - case token::k_double: - { - r = "double"; - switch ((t = lexer_->next ()).keyword ()) - { - case token::k_long: - { - r += " long"; - t = lexer_->next (); - } - default: - break; - } - break; - } - default: - return false; - } - - return true; -} |