From 720c5a33b6a49cf328fdd7611f49153cf8f60247 Mon Sep 17 00:00:00 2001 From: Karen Arutyunov Date: Wed, 8 Apr 2020 14:51:57 +0300 Subject: Separate tests and examples into individual packages Also make cli module to be explicitly enabled via the config.cli configuration variable. --- cli/cli/context.cxx | 2742 +++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 2742 insertions(+) create mode 100644 cli/cli/context.cxx (limited to 'cli/cli/context.cxx') diff --git a/cli/cli/context.cxx b/cli/cli/context.cxx new file mode 100644 index 0000000..54bb988 --- /dev/null +++ b/cli/cli/context.cxx @@ -0,0 +1,2742 @@ +// file : cli/context.cxx +// author : Boris Kolpackov +// license : MIT; see accompanying LICENSE file + +#include +#include +#include // strncmp() +#include +#include +#include + +#include + +using namespace std; + +namespace +{ + char const* keywords[] = + { + "NULL", + "and", + "asm", + "auto", + "bitand", + "bitor", + "bool", + "break", + "case", + "catch", + "char", + "class", + "compl", + "const", + "const_cast", + "continue", + "default", + "delete", + "do", + "double", + "dynamic_cast", + "else", + "end_eq", + "enum", + "explicit", + "export", + "extern", + "false", + "float", + "for", + "friend", + "goto", + "if", + "inline", + "int", + "long", + "mutable", + "namespace", + "new", + "not", + "not_eq", + "operator", + "or", + "or_eq", + "private", + "protected", + "public", + "register", + "reinterpret_cast", + "return", + "short", + "signed", + "sizeof", + "static", + "static_cast", + "struct", + "switch", + "template", + "this", + "throw", + "true", + "try", + "typedef", + "typeid", + "typename", + "union", + "unsigned", + "using", + "virtual", + "void", + "volatile", + "wchar_t", + "while", + "xor", + "xor_eq" + }; +} + +context:: +context (ostream& os_, + output_type ot_, + semantics::cli_unit& unit_, + options_type const& ops) + : data_ (new (shared) data), + os (os_), + unit (unit_), + options (ops), + ot (ot_), + gen_modifier (options.generate_modifier ()), + gen_specifier (options.generate_specifier () || + options.generate_merge ()), + gen_parse (options.generate_parse ()), + gen_merge (options.generate_merge ()), + inl (data_->inl_), + opt_prefix (options.option_prefix ()), + opt_sep (options.option_separator ()), + cli (data_->cli_), + reserved_name_map (options.reserved_name ()), + keyword_set (data_->keyword_set_), + link_regex (data_->link_regex_), + id_set (data_->id_set_), + ref_set (data_->ref_set_), + heading_map (data_->heading_map_), + toc (data_->toc_), + tocs (data_->tocs_) +{ + if (options.suppress_usage ()) + gen_usage = ut_none; + else + { + if (options.long_usage ()) + gen_usage = options.short_usage () ? ut_both : ut_long; + else + gen_usage = ut_short; + } + + if (!options.suppress_inline ()) + data_->inl_ = "inline "; + + data_->cli_ = options.cli_namespace (); + + if (!cli.empty () && cli[0] != ':') + data_->cli_ = "::" + data_->cli_; + + for (size_t i (0); i < sizeof (keywords) / sizeof (char*); ++i) + data_->keyword_set_.insert (keywords[i]); + + // Link regex. + // + for (vector::const_iterator i (ops.link_regex ().begin ()); + i != ops.link_regex ().end (); ++i) + { + try + { + data_->link_regex_.push_back (regexsub (*i)); + } + catch (const regex_format& e) + { + cerr << "error: invalid regex '" << *i << "': " << e.what () << endl; + throw generation_failed (); + } + } + + toc = 0; +} + +context:: +context (context& c) + : data_ (c.data_), + os (c.os), + unit (c.unit), + options (c.options), + ot (c.ot), + gen_modifier (c.gen_modifier), + gen_specifier (c.gen_specifier), + gen_parse (c.gen_parse), + gen_merge (c.gen_merge), + gen_usage (c.gen_usage), + inl (c.inl), + opt_prefix (c.opt_prefix), + opt_sep (c.opt_sep), + cli (c.cli), + reserved_name_map (c.reserved_name_map), + keyword_set (c.keyword_set), + link_regex (c.link_regex), + id_set (c.id_set), + ref_set (c.ref_set), + heading_map (c.heading_map), + toc (c.toc), + tocs (c.tocs) +{ +} + +string context:: +escape (string const& name) const +{ + typedef string::size_type size; + + string r; + size n (name.size ()); + + // In most common cases we will have that many characters. + // + r.reserve (n); + + for (size i (0); i < n; ++i) + { + char c (name[i]); + + if (i == 0) + { + if (!((c >= 'a' && c <= 'z') || + (c >= 'A' && c <= 'Z') || + c == '_')) + r = (c >= '0' && c <= '9') ? "cxx_" : "cxx"; + } + + if (!((c >= 'a' && c <= 'z') || + (c >= 'A' && c <= 'Z') || + (c >= '0' && c <= '9') || + c == '_')) + r += '_'; + else + r += c; + } + + if (r.empty ()) + r = "cxx"; + + // Custom reserved words. + // + reserved_name_map_type::const_iterator i (reserved_name_map.find (r)); + + if (i != reserved_name_map.end ()) + { + if (!i->second.empty ()) + return i->second; + else + r += L'_'; + } + + // Keywords + // + if (keyword_set.find (r) != keyword_set.end ()) + { + r += '_'; + + // Re-run custom words. + // + i = reserved_name_map.find (r); + + if (i != reserved_name_map.end ()) + { + if (!i->second.empty ()) + return i->second; + else + r += L'_'; + } + } + + return r; +} + +string context:: +process_link_target (const string& tg) +{ + bool t (options.link_regex_trace ()); + + if (t) + cerr << "link: '" << tg << "'" << endl; + + string r; + bool found (false); + + for (regex_mapping::const_iterator i (link_regex.begin ()); + i != link_regex.end (); ++i) + { + if (t) + cerr << "try: '" << i->regex () << "' : "; + + if (i->match (tg)) + { + r = i->replace (tg); + found = true; + + if (t) + cerr << "'" << r << "' : "; + } + + if (t) + cerr << (found ? '+' : '-') << endl; + + if (found) + break; + } + + return found ? r : tg; +} + + +string context:: +translate_arg (string const& s, std::set& set) +{ + string r; + r.reserve (s.size ()); + set.clear (); + + size_t p (string::npos); + + for (size_t i (0), n (s.size ()); i < n; ++i) + { + if (p == string::npos && s[i] == '<') + { + p = i; + r += "\\ci{"; + continue; + } + + if (p != string::npos && s[i] == '>') + { + set.insert (string (s, p + 1, i - p - 1)); + r += '}'; + p = string::npos; + continue; + } + + if (p != string::npos && s[i] == '}' && s[i - 1] != '\\') + { + r += "\\}"; + continue; + } + + r += s[i]; + } + + return r; +} + +string context:: +translate (string const& s, std::set const& set) +{ + string r; + r.reserve (s.size ()); + + bool pre (false); + size_t p (string::npos); + + for (size_t i (0), n (s.size ()); i < n; ++i) + { + char c (s[i]); + + // Skip pre-formatted fragments. + // + if (c == (pre ? 0x03 : 0x02)) + { + pre = !pre; + + if (p != string::npos) + { + assert (pre); + r.append (s, p, i - p); + p = string::npos; + } + } + else if (!pre) + { + if (p == string::npos && c == '<') + { + p = i; + continue; + } + + if (p != string::npos) + { + if (c == '>') + { + string a (s, p + 1, i - p - 1); + + if (set.find (a) != set.end ()) + { + r += "\\ci{"; + + for (size_t j (0), n (a.size ()); j < n; ++j) + { + if (a[j] == '}' && (j == 0 || a[j - 1] != '\\')) + r += "\\}"; + else + r += a[j]; + } + + r += '}'; + } + else + { + r += '<'; + r += a; + r += '>'; + } + p = string::npos; + } + continue; + } + } + + r += c; + } + + // If we found the opening '<' but no closing '>', add the rest. + // + if (p != string::npos) + r.append (s, p, string::npos); + + return r; +} + +void context:: +format_line (output_type ot, string& r, const char* l, size_t n) +{ + bool color (options.ansi_color ()); + + typedef unsigned char span; // Mask. + + const span code = 1; + const span itlc = 2; + const span bold = 4; + const span link = 8; + const span note = 16; + + vector spans; + + string link_target; + string link_section; // If not empty, man section; target is man name. + bool link_empty (false); // Link has no text. + + bool escape (false); + for (size_t i (0); i < n; ++i) + { + char c (l[i]); + + if (escape) + { + bool new_span (false); + + switch (c) + { + case ' ': + { + // Non-ignorable space. + // + switch (ot) + { + case ot_plain: + { + r += ' '; + break; + } + case ot_html: + { + r += " "; + break; + } + case ot_man: + { + r += "\\ "; + break; + } + } + + break; + } + case '-': + { + // N-dash. If someone wants m-dash, can use \-- with \-{}- as + // a way to "escape" an n-dash followed by hyphen. + // + switch (ot) + { + case ot_plain: + { + r += "--"; + break; + } + case ot_html: + { + r += "–"; + break; + } + case ot_man: + { + r += "\\(en"; + break; + } + } + + break; + } + case 'n': + { + switch (ot) + { + case ot_plain: + { + r += '\n'; + break; + } + case ot_html: + { + if (!r.empty () && r[r.size () - 1] != '\n') + r += '\n'; + + r += "
"; + break; + } + case ot_man: + { + if (!r.empty () && r[r.size () - 1] != '\n') + r += '\n'; + + // Note that if we have several consecutive breaks, they + // will be collapsed into a single one. No, .sp doesn't + // work (or, more exactly, will only work for two breaks). + // + r += ".br"; + break; + } + } + + // Skip following spaces. + // + for (; i + 1 < n && l[i + 1] == ' '; ++i) ; + + switch (ot) + { + case ot_plain: break; + case ot_html: + case ot_man: + { + if (i + 1 < n) // More text in this paragraph? + r += "\n"; + + break; + } + } + + break; + } + case 'c': + { + span s (code); + size_t j (i + 1); + + if (j < n) + { + if (l[j] == 'i') + { + s |= itlc; + j++; + + if (j < n && l[j] == 'b') + { + s |= bold; + j++; + } + } + else if (l[j] == 'b') + { + s |= bold; + j++; + + if (j < n && l[j] == 'i') + { + s |= itlc; + j++; + } + } + } + + if (j < n && l[j] == '{') + { + i = j; + spans.push_back (s); + new_span = true; + break; + } + + r += 'c'; + break; + } + case 'i': + { + span s (itlc); + size_t j (i + 1); + + if (j < n) + { + if (l[j] == 'c') + { + s |= code; + j++; + + if (j < n && l[j] == 'b') + { + s |= bold; + j++; + } + } + else if (l[j] == 'b') + { + s |= bold; + j++; + + if (j < n && l[j] == 'c') + { + s |= code; + j++; + } + } + } + + if (j < n && l[j] == '{') + { + i = j; + spans.push_back (s); + new_span = true; + break; + } + + r += 'i'; + break; + } + case 'b': + { + span s (bold); + size_t j (i + 1); + + if (j < n) + { + if (l[j] == 'c') + { + s |= code; + j++; + + if (j < n && l[j] == 'i') + { + s |= itlc; + j++; + } + } + else if (l[j] == 'i') + { + s |= itlc; + j++; + + if (j < n && l[j] == 'c') + { + s |= code; + j++; + } + } + } + + if (j < n && l[j] == '{') + { + i = j; + spans.push_back (s); + new_span = true; + break; + } + + r += 'b'; + break; + } + case 'l': + { + if (i + 1 < n && l[i + 1] == '{') + { + string& t (link_target); + + if (!t.empty ()) + { + cerr << "error: nested links in documentation paragraph '" + << string (l, 0, n) << "'" << endl; + throw generation_failed (); + } + + // Find the end of the link target. + // + size_t b (++i + 1), e (b); + for (; i + 1 < n; ++i) + { + char c (l[i + 1]); + + if (c == ' ' || c == '}') + { + e = i + 1; + + if (c == ' ') // Skip spaces. + for (++i; i + 1 < n && l[i + 1] == ' '; ++i) ; + + break; + } + } + + // Run the link target through format_line(ot_plain) to handle + // escaping (e.g., \\$). + // + format_line (ot_plain, t, l + b, e - b); + + if (t.empty ()) + { + cerr << "error: missing link target in documentation paragraph '" + << string (l, 0, n) << "'" << endl; + throw generation_failed (); + } + + // See if link_target is a man page name/section. + // + size_t o (t.find ('(')); + + if (o != string::npos) + { + link_section.assign (t, o + 1, t.size () - o - 2); + + if (t[t.size () - 1] != ')' || link_section.empty ()) + { + cerr << "error: missing man section in '" << t << "'" << endl; + throw generation_failed (); + } + + string n (t, 0, o); + + if (n.empty ()) + { + cerr << "error: missing man page in '" << t << "'" << endl; + throw generation_failed (); + } + + t = n; + } + else + link_section.clear (); + + // If this is a local fragment reference, add it to the set to be + // verified at the end. + // + if (t[0] == '#') + { + assert (link_section.empty ()); // Not a man page. + ref_set.insert (string (t, 1, string::npos)); + } + + link_empty = i + 1 < n && l[i + 1] == '}'; + + spans.push_back (link); + new_span = true; + break; + } + + r += 'l'; + break; + } + case 'N': + { + if (i + 1 < n && l[i + 1] == '{') + { + ++i; + spans.push_back (note); + new_span = true; + break; + } + + r += 'N'; + break; + } + case '\\': + { + switch (ot) + { + case ot_man: + { + r += "\\e"; + break; + } + default: + { + r += '\\'; + break; + } + } + break; + } + case '"': + { + r += '"'; + break; + } + case '\'': + { + r += '\''; + break; + } + case '}': + { + r += '}'; + break; + } + case '|': + { + r += '|'; + break; + } + default: + { + cerr << "error: unknown escape sequence '\\" << c << "' in " + << "documentation paragraph '" << string (l, 0, n) << "'" + << endl; + throw generation_failed (); + } + } + + // If we just added a new span, add corresponding output markup. + // + if (new_span) + { + span s (spans.back ()); + + span es (0); // Effective span. + for (vector::iterator i (spans.begin ()); + i != spans.end (); + ++i) + es |= *i & (code | itlc | bold); + + switch (ot) + { + case ot_plain: + { + if ((s & note) != 0) + { + r += "[Note: "; + } + else if ((s & link) == 0) + { + if (color) + { + if (s & bold) + r += "\033[1m"; + + if (s & itlc) + r += "\033[4m"; + } + } + + break; + } + case ot_html: + { + if (s & note) + { + r += ""; + } + else if (s & link) + { + r += ""; + } + else + { + if (s & code) + r += ""; + + if (s & itlc) + r += ""; + + if (s & bold) + r += ""; + } + + break; + } + case ot_man: + { + if ((s & note) != 0) + { + cerr << "error: \\N{} in man output not yet supported" << endl; + throw generation_failed (); + } + + if ((s & link) == 0) + { + if ((es & itlc) && (es & bold)) + r += "\\f(BI"; + else if (es & itlc) + r += "\\fI"; + else if (es & bold) + r += "\\fB"; + } + + break; + } + } + } + + escape = false; + } + else // Not escape. + { + switch (c) + { + case '\\': + { + escape = true; + break; + } + case '.': + { + if (ot == ot_man) + r += "\\."; + else + r += '.'; + break; + } + case '}': + { + if (!spans.empty ()) + { + span s (spans.back ()); + spans.pop_back (); + + span es (0); // New effective span. + for (vector::iterator i (spans.begin ()); + i != spans.end (); + ++i) + es |= *i & (code | itlc | bold); + + switch (ot) + { + case ot_plain: + { + if ((s & note) != 0) + { + r += ']'; + } + else if ((s & link) != 0) + { + string t (link_section.empty () + ? link_target + : link_target + "(" + link_section + ")"); + + string pt (process_link_target (t)); + + if (pt.empty ()) + { + if (link_empty) + { + cerr << "error: link target '" << t << "' became empty " + << "and link text is also empty" << endl; + throw generation_failed (); + } + } + else + { + if (!link_empty) + r += " ("; + + if (link_section.empty ()) + r += pt; + else + { + if (color) + r += "\033[1m"; + + r += pt; + + if (color) + r += "\033[0m"; + } + + if (!link_empty) + r += ")"; + } + } + else + { + if (color) + { + // While there are codes to turn off bold (22) and + // underline (24), it is not clear how widely they + // are supported. + // + r += "\033[0m"; // Clear all. + + if (es & bold) + r += "\033[1m"; + + if (es & itlc) + r += "\033[4m"; + } + } + + break; + } + case ot_html: + { + if ((s & note) != 0) + { + r += ""; + } + else if ((s & link) != 0) + { + if (link_empty) + { + if (link_section.empty ()) + r += link_target; + else + { + r += ""; + r += link_target + "(" + link_section + ")"; + r += ""; + } + } + + r += ""; + } + else + { + if (s & bold) + r += ""; + + if (s & itlc) + r += ""; + + if (s & code) + r += ""; + } + + break; + } + case ot_man: + { + assert ((s & note) == 0); + + if ((s & link) != 0) + { + string t (link_section.empty () + ? link_target + : link_target + "(" + link_section + ")"); + + string pt (process_link_target (t)); + + if (pt.empty ()) + { + if (link_empty) + { + cerr << "error: link target '" << t << "' became empty " + << "and link text is also empty" << endl; + throw generation_failed (); + } + } + else + { + if (!link_empty) + r += " ("; + + if (link_section.empty ()) + r += pt; + else + r += "\\fB" + pt + "\\fP"; + + if (!link_empty) + r += ")"; + } + } + else + { + // At first sight, \fP (select previous font) looks like + // exactly what we need here. However, it doesn't quite + // have the stack semantics that we need. + // + if ((es & itlc) && (es & bold)) + r += "\\f(BI"; + else if (es & itlc) + r += "\\fI"; + else if (es & bold) + r += "\\fB"; + else + r += "\\fR"; + } + + break; + } + } + + if (s & link) + link_target.clear (); + + break; + } + } + // Fall through. + default: + r += c; + break; + } + } + } + + if (escape) + { + cerr << "error: unterminated escape sequence in documentation " + << "paragraph '" << string (l, 0, n) << "'" << endl; + throw generation_failed (); + } + + if (!spans.empty ()) + { + unsigned char b (spans.back ()); + string bs; + + if (b & code) bs += 'c'; + if (b & itlc) bs += 'i'; + if (b & bold) bs += 'b'; + if (b & link) bs = 'l'; + if (b & note) bs = 'N'; + + + cerr << "error: unterminated formatting span '\\" << bs << "' " + << "in documentation paragraph '" << string (l, 0, n) << "'" << endl; + throw generation_failed (); + } +} + +struct block +{ + // The semantic meaning of \hN and their mapping to HTML is as follows: + // + // \h0 - preface

+ // \H - part

+ // \h1 - chapter

+ // \h - section

or

+ // \h2 - sub-section

+ // + // In HTML, if \h0 or \h1 was seen, then \h is automatically mappend to + //

. Otherwise it is

. + // + // The specifier (0, H, 1, h, 2) is stored in header. + // + enum kind_type {h, ul, ol, dl, li, note, text, pre}; + + kind_type kind; + bool para; // True if first text fragment should be in own paragraph. + + string id; + string header; + string value; + string trailer; + + block (kind_type k, bool p, const string& i, const string& h = "") + : kind (k), para (p), id (i), header (h) {} +}; + +static const char* block_kind_str[] = { + "\\h", "\\ul", "\\ol", "\\dl", "\\li", "\\N", "text", "preformatted text"}; + +inline ostream& +operator<< (ostream& os, block::kind_type k) +{ + return os << block_kind_str[k]; +} + +// Given the contents of a block element, convert any leading/trailing
+// elements to top/bottom margins. While conceptually it would seem better to +// translate them to padding, margins actually give better practical results +// since they overlap with container's margins. +// +static string +html_margin (string& v) +{ + size_t top (0), bot (0); + + const char* b (v.c_str ()); + const char* e (v.c_str () + v.size ()); + + for (; e - b >= 5 && strncmp (b, "
", 5) == 0; ++top) + { + b += 5; + + if (b != e && *b == '\n') // Remove following newline, if any. + ++b; + } + + for (; e - b >= 5 && strncmp (e - 5, "
", 5) == 0; ++bot) + { + e -= 5; + + if (e != b && *(e - 1) == '\n') // Remove preceding newline, if any. + --e; + } + + if (top == 0 && bot == 0) + return ""; + + string t; + t.swap (v); + v.assign (b, e - b); + + ostringstream os; + + if (top != 0) + os << "margin-top:" << top << "em"; + + if (bot != 0) + os << (top != 0 ? ";" : "") << "margin-bottom:" << bot << "em"; + + return os.str (); +} + +// The same idea except there are no margins. So we just strip .br. +// +static void +man_margin (string& v) +{ + size_t top (0), bot (0); + + const char* b (v.c_str ()); + const char* e (v.c_str () + v.size ()); + + for (; e - b >= 3 && strncmp (b, ".br", 3) == 0; ++top) + { + b += 3; + + if (b != e && *b == '\n') // Remove following newline, if any. + ++b; + } + + for (; e - b >= 3 && strncmp (e - 3, ".br", 3) == 0; ++bot) + { + e -= 3; + + if (e != b && *(e - 1) == '\n') // Remove preceding newline, if any. + --e; + } + + if (top != 0 || bot != 0) + { + string t; + t.swap (v); + v.assign (b, e - b); + } +} + +string context:: +format (semantics::scope& scope, string const& s, bool para) +{ + stack blocks; + blocks.push (block (block::text, para, "")); // Top-level. + + // Number of li in ol. Since we don't support nested lists, we don't + // need to push it into the stack. + // + size_t ol_count (0); + + // Mapping of \h to HTML tag. By default it is

until we encounter + // \h0 or \h1 at which point we change it to

. + // + char html_h ('1'); + + bool last (false); + for (size_t b (0), e; !last; b = e + 1) + { + bool pre (s[b] == 0x02); + + const char* l; + size_t n; + string subst; // Variable-substituted. + + if (pre) + { + ++b; // Skip 0x02. + + e = s.find (0x03, b); + assert (e != string::npos); + + l = s.c_str () + b; + n = e - b; + + ++e; // Skip newline that follows 0x03. + last = (e == s.size ()); + } + else + { + e = s.find ('\n', b); + last = (e == string::npos); + + l = s.c_str () + b; + n = (last ? s.size () : e) - b; + + // Perform variable expansions (\$var$). Also detect the switch + // to/from TOC mode. + // + unsigned short t (toc); + if (substitute (scope, l, n, subst)) + { + if (subst.empty ()) + continue; + + if (t != toc) + { + // This is a TOC prologue/epilogue (returned by start/end_toc()) and + // we shouldn't be formatting it. + // + assert (blocks.size () == 1); + string& v (blocks.top ().value); + + // Require that \$TOC$ appears in its own doc string. Failed this + // it is tricky to get indentation right. + // + if (!v.empty () || !last) + { + cerr << "error: TOC variable should be in its own documentation " + << "string" << endl; + throw generation_failed (); + } + + switch (ot) + { + case ot_plain: break; + case ot_html: + { + // Different "newline protocol" inside TOC. + // + v += subst; + + if (toc) + v += '\n'; + } + case ot_man: break; + } + + continue; + } + + l = subst.c_str (); + n = subst.size (); + } + } + + // If this is TOC phase 2, then we simply ignore everything. + // + if (toc == 2) + continue; + + const char* ol (l); // Original, full line for diagnostics. + size_t on (n); + + // First determine what kind of paragraph block this is. + // + block::kind_type k (block::h); + string id; + string header; + string trailer; + + size_t pop (0); // Pop count. + + if (pre) + { + k = block::pre; + } + else + { + // \h \H + // + if (n >= 3 && + l[0] == '\\' && + (l[1] == 'h' || l[1] == 'H') && + (l[2] == '|' || l[2] == '#')) + { + k = block::h; + header = l[1]; + l += 3; + n -= 3; + } + // + // \h0 \h1 \h2 + // + else if (n >= 4 && + l[0] == '\\' && + l[1] == 'h' && + (l[3] == '|' || l[3] == '#')) + { + if (l[2] != '0' && l[2] != '1' && l[2] != '2') + { + cerr << "error: '0', '1', or '2' expected in \\hN| in '" + << string (ol, 0, on) << "'" << endl; + throw generation_failed (); + } + + k = block::h; + header = l[2]; + l += 4; + n -= 4; + } + // + // \ul \ol \dl + // + else if (n >= 4 && + l[0] == '\\' && + (l[1] == 'u' || l[1] == 'o' || l[1] == 'd') && + l[2] == 'l' && + (l[3] == '|' || l[3] == '#')) + { + switch (l[1]) + { + case 'u': k = block::ul; break; + case 'o': k = block::ol; break; + case 'd': k = block::dl; break; + } + + l += 4; + n -= 4; + } + // + // \li + // + else if (n >= 4 && + l[0] == '\\' && + l[1] == 'l' && + l[2] == 'i' && + (l[3] == '|' || l[3] == '#')) + { + k = block::li; + l += 4; + n -= 4; + } + // + // \N (note) + // + else if (n >= 3 && + l[0] == '\\' && + l[1] == 'N' && + (l[2] == '|' || l[2] == '#')) + { + k = block::note; + l += 3; + n -= 3; + } + else + k = block::text; + + // Get the id, if present. + // + if (k != block::text && *(l - 1) == '#') + { + for (; n != 0 && *l != '|'; ++l, --n) + id += *l; + + if (n == 0) + { + cerr << "error: paragraph begin '|' expected after id in '" + << string (ol, 0, on) << "'" << endl; + throw generation_failed (); + } + + ++l; --n; // Skip '|'. + } + + // Skip leading spaces after opening '|'. + // + if (k != block::text) + while (n != 0 && (*l == 0x20 || *l == 0x0D || *l == 0x09)) {++l; --n;} + + // Next figure out how many blocks we need to pop at the end of this + // paragraph. Things get a bit complicated since '|' could be escaped. + // + for (; n - pop > 0 && l[n - pop - 1] == '|'; ++pop) ; + if (pop != 0) + { + // To determine whether the first '|' is part of an escape sequence + // we have to find the first non-backslash character and then figure + // out who escapes whom. + // + size_t ec (0); // Escape count. + for (; n - pop - ec > 0 && l[n - pop - ec - 1] == '\\'; ++ec) ; + + // If we have an odd number of backslashes, then the last '|' is + // escaped. + // + if (ec % 2 != 0) + --pop; + + n -= pop; // Number of special '|' at the end. + + // Skip trailing spaces before closing '|'. + // + while (n != 0 && (l[n - 1] == 0x20 || + l[n - 1] == 0x0D || + l[n - 1] == 0x09)) n--; + } + } + + // Outer block kind. + // + block::kind_type ok (blocks.top ().kind); + + // Verify that this block type is valid in this context. Ignore + // empty text blocks (can happen if we just have '|'). + // + if (k != block::text || n != 0) + { + bool good (true); + + switch (ok) + { + case block::h: good = false; break; + case block::ul: + case block::ol: + case block::dl: good = (k == block::li); break; + case block::li: good = (k == block::note || + k == block::text || + k == block::pre ); break; + case block::note: good = (k == block::text || + k == block::pre || + (ot == ot_html && (k == block::ul || + k == block::ol || + k == block::dl))); break; + case block::text: good = (k != block::li); break; + case block::pre: assert (false); + } + + if (!good) + { + cerr << "error: " << k << " inside " << ok << " " + << "in documentation string '" << s << "'" << endl; + throw generation_failed (); + } + } + + // Check id for duplicates. Do it only on the non-TOC pass. + // + if (!toc && !id.empty ()) + { + if (!id_set.insert (id).second) + { + cerr << "error: duplicate id '" << id << "' in documentation " + << "string '" << s << "'" << endl; + throw generation_failed (); + } + } + + // Verify the block itself. + // + switch (k) + { + case block::h: + { + // \h blocks are only valid if we are required to start a new + // paragraph (first_para is true). + // + if (!para) + { + cerr << "error: paragraph '" << string (ol, 0, on) << "' " + << "not allowed in '" << s << "'" << endl; + throw generation_failed (); + } + + // \h must be single-paragraph. + // + if (pop == 0) + { + cerr << "error: '|' expected at the end of paragraph '" + << string (ol, 0, on) << "'" << endl; + throw generation_failed (); + } + + // \h must not be empty. + // + if (n == 0) + { + cerr << "error: empty paragraph '" << string (ol, 0, on) << "' " + << "in documentation string '" << s << "'" << endl; + throw generation_failed (); + } + + break; + } + case block::ul: + case block::ol: + case block::dl: + { + if (pop != 0) + { + cerr << "error: empty list '" << string (ol, 0, on) << "' " + << "in documentation string '" << s << "'" << endl; + throw generation_failed (); + } + + if (n != 0) + { + cerr << "error: unexpected text after " << k << "| " + << "in paragraph '" << string (ol, 0, on) << "'" << endl; + throw generation_failed (); + } + + break; + } + case block::li: + { + if (ok == block::dl) + { + if (n == 0) + { + cerr << "error: term text missing in paragraph '" + << string (ol, 0, on) << "'" << endl; + throw generation_failed (); + } + } + + break; + } + case block::note: + case block::text: + case block::pre: + break; + } + + // Push the block into the stack. + // + switch (k) + { + case block::h: blocks.push (block (k, false, id, header)); break; + case block::ul: + case block::ol: ol_count = 0; // Fall through. + case block::dl: blocks.push (block (k, true, id)); break; + case block::li: + { + switch (blocks.top ().kind) + { + case block::ol: + { + ostringstream os; + os << ++ol_count; + header = os.str (); + break; + } + case block::dl: + { + format_line (ot, header, l, n); + n = 0; + break; + } + default: + break; + } + + blocks.push (block (k, false, id, header)); + break; + } + case block::note: + { + blocks.push (block (k, true, id, header)); + break; + } + case block::text: break; // No push. + case block::pre: break; // No push. + } + + // Output paragraph text. If we are in TOC mode and this is a top-level + // block, then pretend we don't have anything to write. For non-top-level + // blocks, we handle this below, when we pop them. + // + if (toc && blocks.size () == 1) + n = 0; + + if (n != 0) + { + block& b (blocks.top ()); + string& v (b.value); + bool first (v.empty ()); + + switch (ot) + { + case ot_plain: + { + // Separate paragraphs with a blank line. + // + if (!first) + v += "\n\n"; + + if (k == block::pre) + v.append (l, n); + else + { + /* + // Strip a single "\n" at the beginning of the paragraph since we + // already wrote one newline as part of starting the paragraph. An + // example where this might be useful: + // + // li| + // + // \ + // ... + // \ + // + // \n| + // + if (n >= 2 && l[0] == '\\' && l[1] == 'n') + { + l += 2; + n -= 2; + } + + if (n != 0) + */ + + format_line (ot, v, l, n); + } + + break; + } + case ot_html: + { + // Separate paragraphs with a blank line. + // + if (!first) + v += "\n\n"; + + if (k == block::pre) + { + v += "
";
+            v.append (l, n);
+            v += "
"; + } + else + { + if (!first || b.para) + { + string t; + format_line (ot, t, l, n); + string s (html_margin (t)); + + // If the entire paragraph is wrapped in then signal this + // fact via the "code" class. This can be used, for example, to + // change alignment from "justify" to "left" in order to support + // custom alignments (in combination with non-ignorable space + // escaping; think multi-line synopsis in man pages). + // + // While at it also cleanup nested fragments (we + // may end up with quite a few of them due to the + // translation). + // + string c; + { + size_t n (t.size ()); + + if (n >= 13 && + t.compare (0, 6, "") == 0 && + t.compare (n - 7, 7, "") == 0) + { + string tt; + + // Make sure this is one contigous . + // + size_t b (1); // / balance. + for (size_t i (6), j (0); + i != n && (i = t.find ("code>", i)) != string::npos; + j = i = i + 5) + { + if (t[i - 1] == '<') b++; + else if (t[i - 1] == '/' && t[i - 2] == '<') b--; + else continue; + + // Append previous chunk. + // + tt.append (t, j, i - j - (t[i - 1] == '<' ? 1 : 2)); + + if (b == 0) + { + if (i + 5 == n) + { + tt.append (t, i - 2, 7); // Closing . + t = move (tt); + c = "code"; + } + + break; + } + } + } + } + + v += "= blocks.size ()) // >= to account for top-level. + { + cerr << "error: extraneous '|' at the end of paragraph '" + << string (ol, 0, on) << "'" << endl; + throw generation_failed (); + } + + for (; pop != 0; --pop) + { + block pb (blocks.top ()); // move + string& pi (pb.id); + string& ph (pb.header); + string& pv (pb.value); + + blocks.pop (); + + block& b (blocks.top ()); + string& v (b.value); + + // Handle poping into top-level block in TOC mode. + // + if (toc && blocks.size () == 1) + { + if (pb.kind == block::h) + { + switch (ot) + { + case ot_plain: break; + case ot_html: + { + char t (ph[0]); + + if (pi.empty ()) + { + cerr << "error: TOC heading '" << pv << "' has no id" << endl; + throw generation_failed (); + } + + pv = "" + pv + ""; + + // Unwind heading levels that are deeper ("more sub") than us. + // + for (; tocs.size () != 1; tocs.pop_back ()) + { + toc_entry const& e (tocs.back ()); + + bool pop (true); + + switch (t) + { + case '0': + case 'H': + case '1': break; // Always unwind. + case 'h': + { + pop = html_h == '1' || e.type == 'h' || e.type == '2'; + break; + } + case '2': pop = e.type == '2'; break; + } + + if (!pop) + break; + + // If we have sub-headings, then we need to close the table. + // + if (e.count != 0) + { + size_t l (tocs.size ()); + + v += string (l * 4 - 2, ' ') + "\n"; + v += string (l * 4 - 4, ' ') + "\n"; + } + else + // Otherwise it is inline. + // + v += "\n"; + } + + size_t l (tocs.size ()); + toc_entry& e (tocs.back ()); + + // If this is a first sub-entry, then we need to open the + // sub-table. + // + string in ((l * 4) - 2, ' '); + + if (l > 1 && e.count == 0) + { + v += "\n"; + v += in + "\n"; + } + + in += " "; + + switch (t) + { + case 'H': + { + v += in + "\n"; + break; + } + case '0': + case '1': + case 'h': + case '2': + { + // Calculate Chapter(X)/Section(X.Y)/Subsection(X.Y.Z) unless + // it is the preface (or a subsection thereof). + // + string n; + if (t != '0') + { + ++e.count; + + for (toc_stack::const_iterator i (tocs.begin ()); + i != tocs.end (); + ++i) + { + if (i->type == '0') + { + n.clear (); + break; + } + + if (!n.empty ()) + n += '.'; + + ostringstream os; + os << i->count; + n += os.str (); + } + } + + v += in + ""; + + if (!n.empty ()) + { + v += "\n"; + } + else + // Otherwise it is inline. + // + v += "\n"; + } + + v += "
" + pv + "
" + n + ""; + heading_map[pi] = n; // Save it for later. + } + else + v += ""; + + v += pv; // No newline + tocs.push_back (toc_entry (t)); + break; + } + } + + // Same as in non-TOC mode below. + // + // @@ This only works for a single string fragment. + // + if (t == '0' || t == '1') + html_h = '2'; + + break; + } + case ot_man: break; + } + } + + continue; + } + + switch (ot) + { + case ot_plain: + { + const char* sn (v.empty () ? "" : "\n"); + const char* dn (v.empty () ? "" : "\n\n"); + + switch (pb.kind) + { + case block::h: + { + v += dn; + + if (options.ansi_color ()) + v += "\033[1m"; // Bold. + + v += pv; + + if (options.ansi_color ()) + v += "\033[0m"; + + break; + } + case block::ul: + case block::ol: + case block::dl: v += dn + pv; break; + case block::li: + { + v += sn; + size_t ind (0), vn (pv.size ()); + + switch (b.kind) + { + case block::ul: + v += "* "; + ind = 2; + break; + case block::ol: + v += ph + ". "; + ind = ph.size () + 2; + break; + case block::dl: + v += ph + "\n" + (vn != 0 && pv[0] != '\n' ? " " : ""); + ind = 4; + break; + default: + break; + } + + // Add value with indentation for subsequent paragraphs. + // + char c, p ('\0'); // Current and previous characters. + for (size_t i (0); i != vn; p = c, ++i) + { + c = pv[i]; + + if (p == '\n' && c != '\n') // Don't indent blanks. + v += string (ind, ' '); + + v += c; + } + + break; + } + case block::note: + { + v += dn; + + // Add a special character (bell) plus space in front of every + // line (including blanks). The bell is recognized and + // translated by txt_wrap_lines() to '|'. + // + char p ('\n'); // Previous character. + for (size_t i (0), n (pv.size ()); i != n; ++i) + { + char c (pv[i]); + + if (p == '\n') + v += (c != '\n' ? "\x07 " : "\x07"); + + v += c; + p = c; + } + + break; + } + case block::text: + case block::pre: assert (false); + } + + break; + } + case ot_html: + { + if (!v.empty ()) + v += "\n\n"; + + switch (pb.kind) + { + case block::h: + { + char t (ph[0]); + + string h; + string c; + + typedef map map; + const map& hm (options.html_heading_map ()); + map::const_iterator mi (hm.find (t)); + + if (mi == hm.end ()) + { + switch (t) + { + case '0': h = "h1"; c = "preface"; break; + case 'H': h = "h1"; c = "part"; break; + case '1': h = "h1"; break; + case 'h': h = html_h == '1' ? "h1" : "h2"; break; + case '2': h = "h3"; break; + } + } + else + h = mi->second; + + v += '<' + h; + if (!pi.empty ()) v += " id=\"" + pi + '"'; + if (!c.empty ()) v += " class=\"" + c + '"'; + v += '>'; + + // See if we have a heading number (assigned by TOC). + // + if (!pi.empty ()) + { + heading_map_type::const_iterator i (heading_map.find (pi)); + + if (i != heading_map.end ()) + v += i->second + ' '; + } + + v += pv; + + v += "'; + + // @@ This only works for a single string fragment. + // + if (t == '0' || t == '1') + html_h = '2'; + + break; + } + case block::ul: v += "
    \n" + pv + "\n
"; break; + case block::ol: v += "
    \n" + pv + "\n
"; break; + case block::dl: v += "
\n" + pv + "\n
"; break; + case block::li: + { + string pvs (html_margin (pv)); + + if (b.kind == block::dl) + { + string phs (html_margin (ph)); + + v += (phs.empty () ? "
" : "
") + + ph + "
\n"; + + v += (pvs.empty () ? "
" : "
") + + pv + "
"; + } + else + v += (pvs.empty () ? "
  • " : "
  • ") + + pv + "
  • "; + + break; + } + case block::note: + { + v += "
    \n"; + v += pv; + v += "\n
    "; + + break; + } + case block::text: + case block::pre: assert (false); + } + + break; + } + case ot_man: + { + // Seeing that we always write a macro, one newline is enough. + // + if (!v.empty ()) + v += "\n"; + + switch (pb.kind) + { + case block::h: v += ".SH \"" + pv + "\""; break; + case block::ul: + case block::ol: + case block::dl: + { + if (!b.para) // First list inside .IP. + { + // .IP within .IP? Just shoot me in the head already! We + // have to manually indent it with .RS/.RE *and* everything + // that comes after it (since .PP resets the indent). Why + // not just indent the whole list content? Because then the + // first line will never start on the same line as the term. + // + v += ".RS\n"; + b.trailer = "\n.RE"; + b.para = true; // Start emitting .PP from now on. + } + + v += pv; + break; + } + case block::li: + { + man_margin (ph); // Strip leading/trailing .br. + + switch (b.kind) + { + case block::ul: v += ".IP \\(bu 2em\n" + pv; break; + case block::ol: v += ".IP " + ph + ". 4em\n" + pv; break; + // + // Add .br to force the definition to start on the new line. + // + case block::dl: v += ".IP \"" + ph + "\"\n.br\n" + pv; break; + default: break; + } + + break; + } + case block::note: + { + cerr << "error: " << pb.kind << "| in man output not yet " + << "supported" << endl; + throw generation_failed (); + } + case block::text: + case block::pre: assert (false); + } + + break; + } + } + } + } + + assert (!blocks.empty ()); // Should have top-level. + + if (blocks.size () > 1) + { + cerr << "error: unterminated paragraph " << blocks.top ().kind << " " + << "in documentation string '" << s << "'" << endl; + throw generation_failed (); + } + + block& b (blocks.top ()); + + switch (ot) + { + case ot_man: return b.value + b.trailer; + default: return b.value; + } +} + +string context:: +start_toc () +{ + switch (ot) + { + case ot_plain: break; + case ot_html: + { + tocs.push_back (toc_entry ('\0')); + return " "; + } + case ot_man: break; + } + + return string (); +} + +string context:: +end_toc () +{ + switch (ot) + { + case ot_plain: break; + case ot_html: + { + string v; + + // Unwind the TOC stack until we reach top level. Same code as in + // format(). + // + for (; tocs.size () != 1; tocs.pop_back ()) + { + toc_entry const& e (tocs.back ()); + + // If we have sub-headings, then we need to close the table. + // + if (e.count != 0) + { + size_t l (tocs.size ()); + + v += string (l * 4 - 2, ' ') + "
    \n"; + v += string (l * 4 - 4, ' ') + "
    "; + return v; + } + case ot_man: break; + } + + return string (); +} + +void context:: +verify_id_ref () +{ + if (!options.omit_link_check ()) + { + bool f (false); + + for (id_set_type::const_iterator i (ref_set.begin ()); + i != ref_set.end (); + ++i) + { + if (id_set.find (*i) == id_set.end ()) + { + cerr << "error: no id for fragment link '#" << *i << "'" << endl; + f = true; + } + } + + if (f) + throw generation_failed (); + } + + id_set.clear (); + ref_set.clear (); + heading_map.clear (); +} + +string context:: +substitute (const string& s, const path* d) +{ + string r; + + // Scan the string looking for variables ($var$) or paths. + // + size_t b (0), e (b); + for (size_t n (s.size ()); e != n; ++e) + { + if (s[e] == '$' && e + 1 != n) + { + if (s[e + 1] == '$') // Escape. + { + r.append (s, b, ++e - b); // Keep one, skip the other. + b = e + 1; + continue; + } + + bool file (false); + + // Scan for as long as it is a C identifier or a file. + // + size_t p (e + 1); // Position of the second '$'. + for (; p != n; ++p) + { + char c (s[p]); + bool f (p == e + 1); // First. + + if (d != 0 && f && c == '.' && p + 1 != n && + (s[p + 1] == '/' || // "./" + (s[p + 1] == '.' && p + 2 != n && s[p + 2] == '/'))) // "../" + file = true; + + if (file) + { + if (c == '$') + break; + } + else + { + if (!(c == '_' || + ('a' <= c && c <= 'z') || + ('A' <= c && c <= 'Z') || + (!f && '0' <= c && c <= '9'))) + break; + } + } + + // Note: check that the second '$' is not escaped. + // + if (p != n && s[p] == '$' && (p + 1 == n || s[p + 1] != '$')) + { + r.append (s, b, e - b); // Save what came before the expansion. + + // Var/file name. + // + ++e; + string v (s, e, p - e); + + if (file) + { + path p (*d / path (v)); + p.normalize (); // Apply '.' and '..'. + + ifstream ifs (p.string ().c_str (), ifstream::in | ifstream::binary); + if (!ifs.is_open ()) + { + cerr << p << ": error: unable to open in read mode" << endl; + throw generation_failed (); + } + + // We don't expect our file to contain '\0' so use that as the + // delimiter to read the entire file with getline(). + // + string s; + getline (ifs, s, '\0'); + r += s; + } + else + { + // Handle special variables. + // + if (v == "TOC") + { + if (!r.empty () || p + 1 != n) + { + cerr << "error: TOC variable should be on its own line" << endl; + throw generation_failed (); + } + + // Invert the TOC mode. + // + if ((toc = toc ? 0 : 1)) + r = start_toc (); + else + r = end_toc (); + + return r; + } + + // Lookup and substiute the variable. + // + using semantics::doc; + + if (doc* d = unit.lookup ("", "var: " + v)) + r += d->front (); + else + { + cerr << "error: undefined variable '" << v << "' in '" << s << "'" + << endl; + throw generation_failed (); + } + } + + e = p; + b = e + 1; + } + } + } + + r.append (s, b, e - b); // Last chunk. + return r; +} + +bool context:: +substitute (semantics::scope& scope, const char* s, size_t n, string& result) +{ + bool sub (false); + result.clear (); + + // Scan the string looking for variables (\$var$). + // + size_t b (0), e (b); + for (char p ('\0'); e != n; ++e) + { + char c (s[e]); + + if (p == '\\') + { + if (c == '\\') // Escape sequence. + { + p = '\0'; + continue; + } + + if (c == '$') + { + // Variable expansion. + // + sub = true; + + // Find the closing '$'. + // + size_t p (e + 1); // Position of the second '$'. + for (; p != n && s[p] != '$'; ++p) ; + + if (p == n) + { + cerr << "error: missing closing '$' in '" << string (s, n) << "'" + << endl; + throw generation_failed (); + } + + result.append (s, b, e - b - 1); // Save what came before. + + // Var name. + // + ++e; + string v (s, e, p - e); + + // Handle special variables. + // + if (v == "TOC") + { + if (!result.empty () || p + 1 != n) + { + cerr << "error: TOC variable should be its own paragraph" << endl; + throw generation_failed (); + } + + // Invert the TOC mode. + // + if ((toc = toc ? 0 : 1)) + result = start_toc (); + else + result = end_toc (); + + return true; + } + + // Lookup and substiute. + // + using semantics::doc; + + if (doc* d = unit.lookup (scope.fq_name (), "var: " + v)) + result += d->front (); + else + { + cerr << "error: undefined variable '" << v << "' in '" + << string (s, n) << "'" << endl; + throw generation_failed (); + } + + e = p; + b = e + 1; + } + } + + p = s[e]; + } + + if (sub) + result.append (s, b, e - b); // Last chunk. + + return sub; +} + +string context:: +fq_name (semantics::nameable& n, bool cxx_name) +{ + using namespace semantics; + + string r; + + if (dynamic_cast (&n)) + { + return ""; // Map to global namespace. + } + else + { + r = fq_name (n.scope ()); + r += "::"; + r += cxx_name ? escape (n.name ()) : n.name (); + } + + return r; +} + +string context:: +ns_open (const string& qn, bool last) +{ + string::size_type b (0), e; + + do + { + e = qn.find ("::", b); + string n (qn, b, e == string::npos ? e : e - b); + + if (!n.empty () && (last || e != string::npos)) + os << "namespace " << n << "{"; + + b = e; + + if (b == string::npos) + return n; + + b += 2; + + } while (true); +} + +void context:: +ns_close (const string& qn, bool last) +{ + string::size_type b (0), e; + + do + { + e = qn.find ("::", b); + string n (qn, b, e == string::npos ? e : e - b); + + if (!n.empty () && (last || e != string::npos)) + os << "}"; + + b = e; + + if (b == string::npos) + return; + + b += 2; + + } while (true); +} + +class_doc context:: +class_doc (semantics::class_& c) +{ + typedef map map; + const map& m (options.class_doc ()); + + string n (c.fq_name ()); + + map::const_iterator i (m.find (n)); + + if (i == m.end ()) + { + n = string (n, 2, string::npos); // Try without leading '::'. + i = m.find (n); + } + + if (i == m.end ()) + return cd_default; + + const string& k (i->second); + + if (k == "exclude") + return cd_exclude; + if (k == "exclude-base") + return cd_exclude_base; + else if (k == "short") + return cd_short; + else if (k == "long") + return cd_long; + else + { + cerr << "error: unknown --class-doc kind value '" << k << "'" << endl; + throw generation_failed (); + } +} + +string context:: +first_sentence (string const& s) +{ + size_t p (s.find ('.')); + + // Add some heuristics here: check that there is a space + // (or end of text) after the period. + // + while (p != string::npos && + p + 1 <= s.size () && + s[p + 1] != ' ' && + s[p + 1] != '\n') + p = s.find ('.', p + 1); + + return p == string::npos ? s : string (s, 0, p + 1); +} + +// namespace +// + +void namespace_:: +pre (type& ns) +{ + string name (ns.name ()); + + if (!name.empty ()) + os << "namespace " << escape (name) + << "{"; +} + +void namespace_:: +post (type& ns) +{ + if (!ns.name ().empty ()) + os << "}"; +} -- cgit v1.1