From 83996ed8d7178750a0b96125b209cc97ce5ddba0 Mon Sep 17 00:00:00 2001 From: Boris Kolpackov Date: Thu, 12 Nov 2015 11:12:52 +0200 Subject: New formatter implementation --- cli/context.cxx | 469 +++++++++++++++++++++++++++++++++++++++++++++++++++----- cli/html.cxx | 3 +- 2 files changed, 431 insertions(+), 41 deletions(-) (limited to 'cli') diff --git a/cli/context.cxx b/cli/context.cxx index 463ce69..ab2019f 100644 --- a/cli/context.cxx +++ b/cli/context.cxx @@ -636,6 +636,378 @@ format_line (output_type ot, string& r, const char* s, size_t n) struct block { + enum kind_type {h, ul, ol, dl, li, text, pre}; + + kind_type kind; + bool para; // True if first text fragment should be in own paragraph. + string header; // Term in dl's li. + string value; + + block (kind_type k, bool p, const string& h = "") + : kind (k), para (p), header (h) {} +}; + +static const char* block_kind_str[] = { + "\\h", "\\ul", "\\ol", "\\dl", "\\li", "text", "preformatted text"}; + +inline ostream& +operator<< (ostream& os, block::kind_type k) +{ + return os << block_kind_str[k]; +} + +string context:: +format (output_type ot, string const& s, bool para) +{ + stack blocks; + blocks.push (block (block::text, para)); // Top-level. + + bool last (false); + for (size_t b (0), e; !last; b = e + 1) + { + bool pre (s[b] == 0x02); + + const char* l; + size_t n; + + if (pre) + { + ++b; // Skip 0x02. + + e = s.find (0x03, b); + assert (e != string::npos); + + l = s.c_str () + b; + n = e - b; + + ++e; // Skip newline that follows 0x03. + last = (e == s.size ()); + } + else + { + e = s.find ('\n', b); + last = (e == string::npos); + + l = s.c_str () + b; + n = (last ? s.size () : e) - b; + } + + const char* ol (l); // Original, full line for diagnostics. + size_t on (n); + + // First determine what kind of paragraph block this is. + // + block::kind_type k; + size_t pop (0); // Pop count. + + if (pre) + { + k = block::pre; + } + else + { + if (n >= 3 && strncmp (l, "\\h|", 3) == 0) + { + k = block::h; + l += 3; + n -= 3; + } + else if (n >= 4 && + (strncmp (l, "\\ul|", 4) == 0 || + strncmp (l, "\\ol|", 4) == 0 || + strncmp (l, "\\dl|", 4) == 0)) + { + switch (l[1]) + { + case 'u': k = block::ul; break; + case 'o': k = block::ol; break; + case 'd': k = block::dl; break; + } + + l += 4; + n -= 4; + } + else if (n >= 4 && strncmp (l, "\\li|", 4) == 0) + { + k = block::li; + l += 4; + n -= 4; + } + else + k = block::text; + + // Skip leading spaces after opening '|'. + // + if (k != block::text) + while (n != 0 && (*l == 0x20 || *l == 0x0D || *l == 0x09)) {l++; n--;} + + // Next figure out how many blocks we need to pop at the end of this + // paragraph. Things get a bit complicated since '|' could be escaped. + // + for (; n - pop > 0 && l[n - pop - 1] == '|'; ++pop) ; + if (pop != 0) + { + // To determine whether the first '|' is part of an escape sequence + // we have to find the first non-backslash character and then figure + // out who escapes whom. + // + size_t ec (0); // Escape count. + for (; n - pop - ec > 0 && l[n - pop - ec - 1] == '\\'; ++ec) ; + + // If we have an odd number of backslashes, then the last '|' is + // escaped. + // + if (ec % 2 != 0) + --pop; + + n -= pop; // Number of special '|' at the end. + + // Skip trailing spaces before closing '|'. + // + while (n != 0 && (l[n - 1] == 0x20 || + l[n - 1] == 0x0D || + l[n - 1] == 0x09)) n--; + } + } + + // Outer block kind. + // + block::kind_type ok (blocks.top ().kind); + + // Verify that this block type is valid in this context. Ignore + // empty text blocks (can happen if we just have '|'). + // + if (k != block::text || n != 0) + { + bool good (true); + + switch (ok) + { + case block::h: good = false; break; + case block::ul: + case block::ol: + case block::dl: good = (k == block::li); break; + case block::li: good = (k == block::text || k == block::pre); break; + case block::text: good = (k != block::li); break; + case block::pre: assert (false); + } + + if (!good) + { + cerr << "error: " << k << " inside " << ok << " " + << "in documentation string '" << s << "'" << endl; + throw generation_failed (); + } + } + + // Verify the block itself. + // + switch (k) + { + case block::h: + { + // \h blocks are only valid if we are required to start a new + // paragraph (first_para is true). + // + if (!para) + { + cerr << "error: paragraph '" << string (ol, 0, on) << "' " + << "not allowed in '" << s << "'" << endl; + throw generation_failed (); + } + + // \h must be single-paragraph. + // + if (pop == 0) + { + cerr << "error: '|' expected at the end of paragraph '" + << string (ol, 0, on) << "'" << endl; + throw generation_failed (); + } + + // \h must not be empty. + // + if (n == 0) + { + cerr << "error: empty paragraph '" << string (ol, 0, on) << "' " + << "in documentation string '" << s << "'" << endl; + throw generation_failed (); + } + + break; + } + case block::ul: + case block::ol: + case block::dl: + { + if (pop != 0) + { + cerr << "error: empty list '" << string (ol, 0, on) << "' " + << "in documentation string '" << s << "'" << endl; + throw generation_failed (); + } + + if (n != 0) + { + cerr << "error: unexpected text after " << k << "| " + << "in paragraph '" << string (ol, 0, on) << "'" << endl; + throw generation_failed (); + } + + break; + } + case block::li: + { + if (ok == block::dl) + { + if (n == 0) + { + cerr << "error: term text missing in paragraph '" + << string (ol, 0, on) << "'" << endl; + throw generation_failed (); + } + } + + break; + } + case block::text: + case block::pre: + break; + } + + // Push the block into the stack. + // + switch (k) + { + case block::h: blocks.push (block (k, false)); break; + case block::ul: + case block::ol: + case block::dl: blocks.push (block (k, true)); break; + case block::li: + { + string h; + if (blocks.top ().kind == block::dl) + { + format_line (ot, h, l, n); + n = 0; + } + + blocks.push (block (k, false, h)); + break; + } + case block::text: break; // No push. + case block::pre: break; // No push. + } + + // Output paragraph text. + // + if (n != 0) + { + block& b (blocks.top ()); + string& v (b.value); + bool first (v.empty ()); + + // Separate paragraphs with a blank line. + // + if (!first) + v += "\n\n"; + + if (k == block::pre) + { + if (ot == ot_html) + v += "
";
+
+        v.append (l, n);
+
+        if (ot == ot_html)
+          v += "
"; + } + else + { + if (!first || b.para) + { + if (ot == ot_html) + v += "

"; + } + + format_line (ot, v, l, n); + + if (!first || b.para) + { + if (ot == ot_html) + v += "

"; + } + } + } + + // Pop paragraph blocks. + // + if (pop >= blocks.size ()) // >= to account for top-level. + { + cerr << "error: extraneous '|' at the end of paragraph '" + << string (ol, 0, on) << "'" << endl; + throw generation_failed (); + } + + for (; pop != 0; --pop) + { + block pb (blocks.top ()); // move + string& pv (pb.value); + string& ph (pb.header); + + blocks.pop (); + + block& b (blocks.top ()); + string& v (b.value); + + if (ot == ot_html) + { + // Separate paragraphs with a blank line. + // + if (!v.empty ()) + v += "\n\n"; + + switch (pb.kind) + { + case block::h: v += "

" + pv + "

"; break; + case block::ul: v += ""; break; + case block::ol: v += "
    \n" + pv + "\n
"; break; + case block::dl: v += "
\n" + pv + "\n
"; break; + case block::li: + { + if (b.kind == block::dl) + { + v += "
" + ph + "
\n"; + v += "
" + pv + "
"; + } + else + v += "
  • " + pv + "
  • "; + + break; + } + case block::text: + case block::pre: assert (false); + } + } + } + } + + assert (!blocks.empty ()); // Should have top-level. + + if (blocks.size () > 1) + { + cerr << "error: unterminated paragraph " << blocks.top ().kind << " " + << "in documentation string '" << s << "'" << endl; + throw generation_failed (); + } + + return blocks.top ().value; +} + +/* + +struct block +{ enum value {h, ul, ol, dl, li, text} v_; block (value v = text): v_ (v) {} operator value () const {return v_;} @@ -655,8 +1027,7 @@ format (output_type ot, string const& s, bool first_para) string r; stack blocks; - // Flag that indicates whether the next fragment of text should start - // in its own paragraph. + // Flag that indicates whether this is the first paragraph. // bool para (first_para); @@ -776,13 +1147,16 @@ format (output_type ot, string const& s, bool first_para) throw generation_failed (); } + // Outer block or 'text' if top level. + // + block ob (blocks.empty () ? block (block::text) : blocks.top ()); + // Verify that this block type is valid in this context. Skip // empty text blocks (can happen if we just have '|'). // if (b != block::text || n != 0) { bool good (true); - block ob (blocks.empty () ? block (block::text) : blocks.top ()); switch (ob) { @@ -858,7 +1232,7 @@ format (output_type ot, string const& s, bool first_para) break; case block::li: - if (blocks.top () == block::dl) + if (ob == block::dl) { if (n == 0) { @@ -873,24 +1247,47 @@ format (output_type ot, string const& s, bool first_para) break; } + // Push the paragraph block. + // + if (b != block::text) + blocks.push (b); + // Output opening markup. // - if (ot == ot_html) + switch (ot) { + case ot_plain: + switch (b) + { + case block::li: + switch (ob) + { + case block::ul: r += "* "; break; + case block::ol: + case block::dl: + default: break; + } + case block::h: + case block::ul: + case block::ol: + case block::dl: + case block::text: + break; + } + break; + case ot_html: switch (b) { case block::h: r += "

    "; break; case block::ul: r += "
      "; break; case block::ol: r += "
        "; break; case block::dl: r += "
        "; break; - case block::li: - r += (blocks.top () == block::dl ? "
        " : "
      1. "); - break; - case block::text: - if (n != 0 && para) - r += "

        "; - break; + case block::li: r += (ob == block::dl ? "

        " : "
      2. "); break; + case block::text: if (n != 0 && para) r += "

        "; break; } + break; + case ot_man: + break; // @@ TODO } // Output paragraph text. @@ -898,38 +1295,32 @@ format (output_type ot, string const& s, bool first_para) if (n != 0) format_line (ot, r, l, n); - // Output intermediate markup, if any. + // Set the para flag and output intermediate markup, if any. // - if (ot == ot_html) + switch (ot) { + case ot_plain: + break; + case ot_html: switch (b) { case block::li: - if (blocks.top () == block::dl) - r += "

      3. \n
        "; + if (ob == block::dl) r += "
      4. \n
        "; + para = (ob != block::dl && n != 0); break; case block::text: - if (n != 0 && para) - r += "

        "; + if (n != 0 && para) r += "

        "; + para = para || (n != 0); + break; + default: + para = true; break; - default: break; } + break; + case ot_man: + break; // @@ TODO } - // Set the para flag. - // - switch (b) - { - case block::li: para = (blocks.top () != block::dl); break; - case block::text: para = para || (n != 0); break; - default: para = true; break; - } - - // Push the paragraph block. - // - if (b != block::text) - blocks.push (b); - // Pop paragraph blocks. // for (; pc != 0; --pc) @@ -937,6 +1328,8 @@ format (output_type ot, string const& s, bool first_para) b = blocks.top (); blocks.pop (); + ob = blocks.empty () ? block (block::text) : blocks.top (); + if (ot == ot_html) { switch (b) @@ -945,24 +1338,21 @@ format (output_type ot, string const& s, bool first_para) case block::ul: r += "
    "; break; case block::ol: r += ""; break; case block::dl: r += ""; break; - case block::li: r += blocks.top () == block::dl ? "" : ""; - break; + case block::li: r += (ob == block::dl ? "" : ""); break; case block::text: break; } if (pc != 1) // Add empty line unless this is the last separator. r += "\n\n"; - } - para = true; // End of a block always means new paragraph. + para = true; // End of a block always means new paragraph. + } } } if (last) break; - // Separate paragraphs with newline. - // if (para) r += "\n\n"; } @@ -976,6 +1366,7 @@ format (output_type ot, string const& s, bool first_para) return r; } +*/ string context:: fq_name (semantics::nameable& n, bool cxx_name) diff --git a/cli/html.cxx b/cli/html.cxx index 1f4e229..e91fdd7 100644 --- a/cli/html.cxx +++ b/cli/html.cxx @@ -241,8 +241,7 @@ namespace if (!c.names_empty ()) { - os << "
    " << endl - << endl; + os << "
    " << endl; names (c, names_option_); os << "
    " << endl << endl; -- cgit v1.1