From 83996ed8d7178750a0b96125b209cc97ce5ddba0 Mon Sep 17 00:00:00 2001
From: Boris Kolpackov <boris@codesynthesis.com>
Date: Thu, 12 Nov 2015 11:12:52 +0200
Subject: New formatter implementation

---
 cli/context.cxx | 469 +++++++++++++++++++++++++++++++++++++++++++++++++++-----
 cli/html.cxx    |   3 +-
 2 files changed, 431 insertions(+), 41 deletions(-)

(limited to 'cli')
diff --git a/cli/context.cxx b/cli/context.cxx
index 463ce69..ab2019f 100644
--- a/cli/context.cxx
+++ b/cli/context.cxx
@@ -636,6 +636,378 @@ format_line (output_type ot, string& r, const char* s, size_t n)
 
 struct block
 {
+  enum kind_type {h, ul, ol, dl, li, text, pre};
+
+  kind_type kind;
+  bool para;     // True if first text fragment should be in own paragraph.
+  string header; // Term in dl's li.
+  string value;
+
+  block (kind_type k, bool p, const string& h = "")
+      : kind (k), para (p), header (h) {}
+};
+
+static const char* block_kind_str[] = {
+  "\\h", "\\ul", "\\ol", "\\dl", "\\li", "text", "preformatted text"};
+
+inline ostream&
+operator<< (ostream& os, block::kind_type k)
+{
+  return os << block_kind_str[k];
+}
+
+string context::
+format (output_type ot, string const& s, bool para)
+{
+  stack<block> blocks;
+  blocks.push (block (block::text, para)); // Top-level.
+
+  bool last (false);
+  for (size_t b (0), e; !last; b = e + 1)
+  {
+    bool pre (s[b] == 0x02);
+
+    const char* l;
+    size_t n;
+
+    if (pre)
+    {
+      ++b; // Skip 0x02.
+
+      e = s.find (0x03, b);
+      assert (e != string::npos);
+
+      l = s.c_str () + b;
+      n = e - b;
+
+      ++e; // Skip newline that follows 0x03.
+      last = (e == s.size ());
+    }
+    else
+    {
+      e = s.find ('\n', b);
+      last = (e == string::npos);
+
+      l = s.c_str () + b;
+      n = (last ? s.size () : e) - b;
+    }
+
+    const char* ol (l); // Original, full line for diagnostics.
+    size_t on (n);
+
+    // First determine what kind of paragraph block this is.
+    //
+    block::kind_type k;
+    size_t pop (0); // Pop count.
+
+    if (pre)
+    {
+      k = block::pre;
+    }
+    else
+    {
+      if (n >= 3 && strncmp (l, "\\h|", 3) == 0)
+      {
+        k = block::h;
+        l += 3;
+        n -= 3;
+      }
+      else if (n >= 4 &&
+               (strncmp (l, "\\ul|", 4) == 0 ||
+                strncmp (l, "\\ol|", 4) == 0 ||
+                strncmp (l, "\\dl|", 4) == 0))
+      {
+        switch (l[1])
+        {
+        case 'u': k = block::ul; break;
+        case 'o': k = block::ol; break;
+        case 'd': k = block::dl; break;
+        }
+
+        l += 4;
+        n -= 4;
+      }
+      else if (n >= 4 && strncmp (l, "\\li|", 4) == 0)
+      {
+        k = block::li;
+        l += 4;
+        n -= 4;
+      }
+      else
+        k = block::text;
+
+      // Skip leading spaces after opening '|'.
+      //
+      if (k != block::text)
+        while (n != 0 && (*l == 0x20 || *l == 0x0D || *l == 0x09)) {l++; n--;}
+
+      // Next figure out how many blocks we need to pop at the end of this
+      // paragraph. Things get a bit complicated since '|' could be escaped.
+      //
+      for (; n - pop > 0 && l[n - pop - 1] == '|'; ++pop) ;
+      if (pop != 0)
+      {
+        // To determine whether the first '|' is part of an escape sequence
+        // we have to find the first non-backslash character and then figure
+        // out who escapes whom.
+        //
+        size_t ec (0); // Escape count.
+        for (; n - pop - ec > 0 && l[n - pop - ec - 1] == '\\'; ++ec) ;
+
+        // If we have an odd number of backslashes, then the last '|' is
+        // escaped.
+        //
+        if (ec % 2 != 0)
+          --pop;
+
+        n -= pop; // Number of special '|' at the end.
+
+        // Skip trailing spaces before closing '|'.
+        //
+        while (n != 0 && (l[n - 1] == 0x20 ||
+                          l[n - 1] == 0x0D ||
+                          l[n - 1] == 0x09)) n--;
+      }
+    }
+
+    // Outer block kind.
+    //
+    block::kind_type ok (blocks.top ().kind);
+
+    // Verify that this block type is valid in this context. Ignore
+    // empty text blocks (can happen if we just have '|').
+    //
+    if (k != block::text || n != 0)
+    {
+      bool good (true);
+
+      switch (ok)
+      {
+      case block::h: good = false; break;
+      case block::ul:
+      case block::ol:
+      case block::dl: good = (k == block::li); break;
+      case block::li: good = (k == block::text || k == block::pre); break;
+      case block::text: good = (k != block::li); break;
+      case block::pre: assert (false);
+      }
+
+      if (!good)
+      {
+        cerr << "error: " << k << " inside " << ok << " "
+             << "in documentation string '" << s << "'" << endl;
+        throw generation_failed ();
+      }
+    }
+
+    // Verify the block itself.
+    //
+    switch (k)
+    {
+    case block::h:
+      {
+        // \h blocks are only valid if we are required to start a new
+        // paragraph (first_para is true).
+        //
+        if (!para)
+        {
+          cerr << "error: paragraph '" << string (ol, 0, on) << "' "
+               << "not allowed in '" << s << "'" << endl;
+          throw generation_failed ();
+        }
+
+        // \h must be single-paragraph.
+        //
+        if (pop == 0)
+        {
+          cerr << "error: '|' expected at the end of paragraph '"
+               << string (ol, 0, on) << "'" << endl;
+          throw generation_failed ();
+        }
+
+        // \h must not be empty.
+        //
+        if (n == 0)
+        {
+          cerr << "error: empty paragraph '" << string (ol, 0, on) << "' "
+               << "in documentation string '" << s << "'" << endl;
+          throw generation_failed ();
+        }
+
+        break;
+      }
+    case block::ul:
+    case block::ol:
+    case block::dl:
+      {
+        if (pop != 0)
+        {
+          cerr << "error: empty list '" << string (ol, 0, on) << "' "
+               << "in documentation string '" << s << "'" << endl;
+          throw generation_failed ();
+        }
+
+        if (n != 0)
+        {
+          cerr << "error: unexpected text after " << k << "| "
+               << "in paragraph '" << string (ol, 0, on) << "'" << endl;
+          throw generation_failed ();
+        }
+
+        break;
+      }
+    case block::li:
+      {
+        if (ok == block::dl)
+        {
+          if (n == 0)
+          {
+            cerr << "error: term text missing in paragraph '"
+                 << string (ol, 0, on) << "'" << endl;
+            throw generation_failed ();
+          }
+        }
+
+        break;
+      }
+    case block::text:
+    case block::pre:
+      break;
+    }
+
+    // Push the block into the stack.
+    //
+    switch (k)
+    {
+    case block::h: blocks.push (block (k, false)); break;
+    case block::ul:
+    case block::ol:
+    case block::dl: blocks.push (block (k, true)); break;
+    case block::li:
+      {
+        string h;
+        if (blocks.top ().kind == block::dl)
+        {
+          format_line (ot, h, l, n);
+          n = 0;
+        }
+
+        blocks.push (block (k, false, h));
+        break;
+      }
+    case block::text: break; // No push.
+    case block::pre: break;  // No push.
+    }
+
+    // Output paragraph text.
+    //
+    if (n != 0)
+    {
+      block& b (blocks.top ());
+      string& v (b.value);
+      bool first (v.empty ());
+
+      // Separate paragraphs with a blank line.
+      //
+      if (!first)
+        v += "\n\n";
+
+      if (k == block::pre)
+      {
+        if (ot == ot_html)
+          v += "<pre>";
+
+        v.append (l, n);
+
+        if (ot == ot_html)
+          v += "</pre>";
+      }
+      else
+      {
+        if (!first || b.para)
+        {
+          if (ot == ot_html)
+            v += "<p>";
+        }
+
+        format_line (ot, v, l, n);
+
+        if (!first || b.para)
+        {
+          if (ot == ot_html)
+            v += "</p>";
+        }
+      }
+    }
+
+    // Pop paragraph blocks.
+    //
+    if (pop >= blocks.size ()) // >= to account for top-level.
+    {
+      cerr << "error: extraneous '|' at the end of paragraph '"
+           << string (ol, 0, on) << "'" << endl;
+      throw generation_failed ();
+    }
+
+    for (; pop != 0; --pop)
+    {
+      block pb (blocks.top ()); // move
+      string& pv (pb.value);
+      string& ph (pb.header);
+
+      blocks.pop ();
+
+      block& b (blocks.top ());
+      string& v (b.value);
+
+      if (ot == ot_html)
+      {
+        // Separate paragraphs with a blank line.
+        //
+        if (!v.empty ())
+          v += "\n\n";
+
+        switch (pb.kind)
+        {
+        case block::h:  v += "<h1>" + pv + "</h1>"; break;
+        case block::ul: v += "<ul>\n" + pv + "\n</ul>"; break;
+        case block::ol: v += "<ol>\n" + pv + "\n</ol>"; break;
+        case block::dl: v += "<dl>\n" + pv + "\n</dl>"; break;
+        case block::li:
+          {
+            if (b.kind == block::dl)
+            {
+              v += "<dt>" + ph + "</dt>\n";
+              v += "<dd>" + pv + "</dd>";
+            }
+            else
+              v += "<li>" + pv + "</li>";
+
+            break;
+          }
+        case block::text:
+        case block::pre: assert (false);
+        }
+      }
+    }
+  }
+
+  assert (!blocks.empty ()); // Should have top-level.
+
+  if (blocks.size () > 1)
+  {
+    cerr << "error: unterminated paragraph " << blocks.top ().kind << " "
+         << "in documentation string '" << s << "'" << endl;
+    throw generation_failed ();
+  }
+
+  return blocks.top ().value;
+}
+
+/*
+
+struct block
+{
   enum value {h, ul, ol, dl, li, text} v_;
   block (value v = text): v_ (v) {}
   operator value () const {return v_;}
@@ -655,8 +1027,7 @@ format (output_type ot, string const& s, bool first_para)
   string r;
   stack<block> blocks;
 
-  // Flag that indicates whether the next fragment of text should start
-  // in its own paragraph.
+  // Flag that indicates whether this is the first paragraph.
   //
   bool para (first_para);
 
@@ -776,13 +1147,16 @@ format (output_type ot, string const& s, bool first_para)
         throw generation_failed ();
       }
 
+      // Outer block or 'text' if top level.
+      //
+      block ob (blocks.empty () ? block (block::text) : blocks.top ());
+
       // Verify that this block type is valid in this context. Skip
       // empty text blocks (can happen if we just have '|').
       //
       if (b != block::text || n != 0)
       {
         bool good (true);
-        block ob (blocks.empty () ? block (block::text) : blocks.top ());
 
         switch (ob)
         {
@@ -858,7 +1232,7 @@ format (output_type ot, string const& s, bool first_para)
         break;
       case block::li:
 
-        if (blocks.top () == block::dl)
+        if (ob == block::dl)
         {
           if (n == 0)
           {
@@ -873,24 +1247,47 @@ format (output_type ot, string const& s, bool first_para)
         break;
       }
 
+      // Push the paragraph block.
+      //
+      if (b != block::text)
+        blocks.push (b);
+
       // Output opening markup.
       //
-      if (ot == ot_html)
+      switch (ot)
       {
+      case ot_plain:
+        switch (b)
+        {
+        case block::li:
+          switch (ob)
+          {
+          case block::ul: r += "* "; break;
+          case block::ol:
+          case block::dl:
+          default: break;
+          }
+        case block::h:
+        case block::ul:
+        case block::ol:
+        case block::dl:
+        case block::text:
+          break;
+        }
+        break;
+      case ot_html:
         switch (b)
         {
         case block::h:  r += "<h1>"; break;
         case block::ul: r += "<ul>"; break;
         case block::ol: r += "<ol>"; break;
         case block::dl: r += "<dl>"; break;
-        case block::li:
-          r += (blocks.top () == block::dl ? "<dt>" : "<li>");
-          break;
-        case block::text:
-          if (n != 0 && para)
-            r += "<p>";
-          break;
+        case block::li: r += (ob == block::dl ? "<dt>" : "<li>"); break;
+        case block::text: if (n != 0 && para) r += "<p>"; break;
         }
+        break;
+      case ot_man:
+        break; // @@ TODO
       }
 
       // Output paragraph text.
@@ -898,38 +1295,32 @@ format (output_type ot, string const& s, bool first_para)
       if (n != 0)
         format_line (ot, r, l, n);
 
-      // Output intermediate markup, if any.
+      // Set the para flag and output intermediate markup, if any.
       //
-      if (ot == ot_html)
+      switch (ot)
       {
+      case ot_plain:
+        break;
+      case ot_html:
         switch (b)
         {
         case block::li:
-          if (blocks.top () == block::dl)
-            r += "</dt>\n<dd>";
+          if (ob == block::dl) r += "</dt>\n<dd>";
+          para = (ob != block::dl && n != 0);
           break;
         case block::text:
-          if (n != 0 && para)
-            r += "</p>";
+          if (n != 0 && para) r += "</p>";
+          para = para || (n != 0);
+          break;
+        default:
+          para = true;
           break;
-        default: break;
         }
+        break;
+      case ot_man:
+        break; // @@ TODO
       }
 
-      // Set the para flag.
-      //
-      switch (b)
-      {
-      case block::li: para = (blocks.top () != block::dl); break;
-      case block::text: para = para || (n != 0); break;
-      default: para = true; break;
-      }
-
-      // Push the paragraph block.
-      //
-      if (b != block::text)
-        blocks.push (b);
-
       // Pop paragraph blocks.
       //
       for (; pc != 0; --pc)
@@ -937,6 +1328,8 @@ format (output_type ot, string const& s, bool first_para)
         b = blocks.top ();
         blocks.pop ();
 
+        ob = blocks.empty () ? block (block::text) : blocks.top ();
+
         if (ot == ot_html)
         {
           switch (b)
@@ -945,24 +1338,21 @@ format (output_type ot, string const& s, bool first_para)
           case block::ul: r += "</ul>"; break;
           case block::ol: r += "</ol>"; break;
           case block::dl: r += "</dl>"; break;
-          case block::li: r += blocks.top () == block::dl ? "</dd>" : "</li>";
-            break;
+          case block::li: r += (ob == block::dl ? "</dd>" : "</li>"); break;
           case block::text: break;
           }
 
           if (pc != 1) // Add empty line unless this is the last separator.
             r += "\n\n";
-        }
 
-        para = true; // End of a block always means new paragraph.
+          para = true; // End of a block always means new paragraph.
+        }
       }
     }
 
     if (last)
       break;
 
-    // Separate paragraphs with newline.
-    //
     if (para)
       r += "\n\n";
   }
@@ -976,6 +1366,7 @@ format (output_type ot, string const& s, bool first_para)
 
   return r;
 }
+*/
 
 string context::
 fq_name (semantics::nameable& n, bool cxx_name)
diff --git a/cli/html.cxx b/cli/html.cxx
index 1f4e229..e91fdd7 100644
--- a/cli/html.cxx
+++ b/cli/html.cxx
@@ -241,8 +241,7 @@ namespace
 
       if (!c.names_empty ())
       {
-        os << "  <dl class=\"options\">" << endl
-           << endl;
+        os << "  <dl class=\"options\">" << endl;
         names (c, names_option_);
         os << "  </dl>" << endl
            << endl;
-- 
cgit v1.1