From 381f872a76f6ad8fc5caee172e903f08dd652547 Mon Sep 17 00:00:00 2001
From: Boris Kolpackov <boris@codesynthesis.com>
Date: Thu, 29 Oct 2015 12:40:46 +0200
Subject: Add parsing support for scope-level documentation

---
 cli/buildfile              |   2 +-
 cli/html.cxx               |   2 +-
 cli/makefile               |   1 +
 cli/man.cxx                |   2 +-
 cli/parser.cxx             | 279 ++++++++++++++++++++++++++++++---------------
 cli/parser.hxx             |  11 +-
 cli/semantics.hxx          |   1 +
 cli/semantics/doc.cxx      |  28 +++++
 cli/semantics/doc.hxx      |  23 ++++
 cli/semantics/elements.hxx |   3 +
 cli/semantics/option.hxx   |  11 +-
 cli/source.cxx             |   4 +-
 cli/traversal.hxx          |   1 +
 cli/traversal/doc.hxx      |  17 +++
 14 files changed, 280 insertions(+), 105 deletions(-)
 create mode 100644 cli/semantics/doc.cxx
 create mode 100644 cli/semantics/doc.hxx
 create mode 100644 cli/traversal/doc.hxx

(limited to 'cli')
diff --git a/cli/buildfile b/cli/buildfile
index ebb8447..88b31f3 100644
--- a/cli/buildfile
+++ b/cli/buildfile
@@ -1,6 +1,6 @@
 import libs = libcutl%lib{cutl}
 
-sema = semantics/{class elements expression namespace option unit}
+sema = semantics/{class doc elements expression namespace option unit}
 trav = traversal/{class elements namespace option unit}
 
 exe{cli}: cxx{cli generator html lexer name-processor parser runtime-inline \
diff --git a/cli/html.cxx b/cli/html.cxx
index 1b60827..1679fd3 100644
--- a/cli/html.cxx
+++ b/cli/html.cxx
@@ -21,7 +21,7 @@ namespace
     {
       using semantics::names;
 
-      type::doc_list const& doc (o.doc ());
+      semantics::doc_strings const& doc (o.doc ());
 
       if (options.suppress_undocumented () && doc.empty ())
         return;
diff --git a/cli/makefile b/cli/makefile
index c06f8d4..78e09b1 100644
--- a/cli/makefile
+++ b/cli/makefile
@@ -23,6 +23,7 @@ name-processor.cxx
 
 cxx_tun +=               \
 semantics/class.cxx      \
+semantics/doc.cxx        \
 semantics/elements.cxx   \
 semantics/expression.cxx \
 semantics/namespace.cxx  \
diff --git a/cli/man.cxx b/cli/man.cxx
index 218cd57..b6fd7e8 100644
--- a/cli/man.cxx
+++ b/cli/man.cxx
@@ -21,7 +21,7 @@ namespace
     {
       using semantics::names;
 
-      type::doc_list const& doc (o.doc ());
+      semantics::doc_strings const& doc (o.doc ());
 
       if (options.suppress_undocumented () && doc.empty ())
         return;
diff --git a/cli/parser.cxx b/cli/parser.cxx
index d016f19..faca765 100644
--- a/cli/parser.cxx
+++ b/cli/parser.cxx
@@ -8,6 +8,7 @@
 #include <sys/stat.h>  // stat
 
 #include <fstream>
+#include <sstream>
 #include <iostream>
 
 #include "token.hxx"
@@ -174,6 +175,8 @@ parse (std::istream& is, path const& p)
   lexer l (is, p.string ());
   lexer_ = &l;
 
+  doc_count_ = 0;
+
   path_ = &p;
   valid_ = true;
 
@@ -221,7 +224,8 @@ def_unit ()
       }
 
       cerr << *path_ << ':' << t.line () << ':' << t.column () << ": error: "
-           << "expected namespace or class declaration instead of " << t
+           << "expected namespace, class, or documentation declaration or "
+           << "instead of " << t
            << endl;
       throw error ();
     }
@@ -372,28 +376,114 @@ include_decl ()
 bool parser::
 decl (token& t)
 {
-  if (t.type () == token::t_keyword)
+  switch (t.type ())
   {
-    switch (t.keyword ())
+  case token::t_keyword:
     {
-    case token::k_namespace:
-      {
-        namespace_def ();
-        return true;
-      }
-    case token::k_class:
+      switch (t.keyword ())
       {
-        class_def ();
-        return true;
+      case token::k_namespace:
+        {
+          namespace_def ();
+          return true;
+        }
+      case token::k_class:
+        {
+          class_def ();
+          return true;
+        }
+      default:
+        break;
       }
-    default:
+
       break;
     }
+  case token::t_punctuation:
+    {
+      if (t.punctuation () != token::p_lcbrace)
+        break;
+
+      // Fall through.
+    }
+  case token::t_string_lit:
+    {
+      scope_doc (t);
+      return true;
+    }
+  default:
+    break;
   }
+
   return false;
 }
 
 void parser::
+scope_doc (token& t)
+{
+  size_t l (t.line ()), c (t.column ());
+
+  doc* d (0);
+
+  if (valid_)
+  {
+    // Use a counter to give scope-level docs unique names. We use a
+    // single counter throughout all units/scope because we could be
+    // reopening namespaces.
+    //
+    ostringstream os;
+    os << "doc " << doc_count_++;
+
+    d = &root_->new_node<doc> (*path_, l, c);
+    root_->new_edge<names> (*scope_, *d, os.str ());
+  }
+
+  if (t.type () == token::t_string_lit)
+  {
+    // string-literal
+    //
+    if (valid_)
+    {
+      d->push_back (doc_string (t));
+      cerr << d->name () << " '" << d->back () << "'" << endl;
+    }
+  }
+  else
+  {
+    // doc-string-seq
+    //
+    assert (t.punctuation () == token::p_lcbrace);
+
+    for (t = lexer_->next ();; t = lexer_->next ())
+    {
+      if (t.type () != token::t_string_lit)
+      {
+        cerr << *path_ << ':' << t.line () << ':' << t.column () << ": error: "
+             << "expected documentation string instead of " << t << endl;
+        throw error ();
+      }
+
+      if (valid_)
+      {
+        d->push_back (doc_string (t));
+        cerr << d->name () << " '" << d->back () << "'" << endl;
+      }
+
+      t = lexer_->next ();
+
+      if (t.punctuation () != token::p_comma)
+        break;
+    }
+
+    if (t.punctuation () != token::p_rcbrace)
+    {
+      cerr << *path_ << ':' << t.line () << ':' << t.column () << ": error: "
+           << "expected '}' instead of " << t << endl;
+      throw error ();
+    }
+  }
+}
+
+void parser::
 namespace_def ()
 {
   token t (lexer_->next ());
@@ -434,7 +524,7 @@ namespace_def ()
   if (t.punctuation () != token::p_rcbrace)
   {
     cerr << *path_ << ':' << t.line () << ':' << t.column () << ": error: "
-         << "expected namespace declaration, class declaration, or '}' "
+         << "expected namespace, class, or documentation declaration or '}' "
          << "instead of " << t << endl;
     throw error ();
   }
@@ -760,85 +850,7 @@ option_def (token& t)
       }
 
       if (valid_)
-      {
-        // Get rid of '"'.
-        //
-        string t1, t2;
-        string const& l (t.literal ());
-        char p ('\0');
-
-        for (size_t i (0), n (l.size ()); i < n; ++i)
-        {
-          if (l[i] == '"' && p != '\\')
-            continue;
-
-          // We need to keep track of \\ escapings so we don't confuse
-          // them with \", as in "\\".
-          //
-          if (l[i] == '\\' && p == '\\')
-            p = '\0';
-          else
-            p = l[i];
-
-          t1 += l[i];
-        }
-
-        // Get rid of leading and trailing spaces in each line.
-        //
-        if (t1.size () != 0)
-        {
-          bool more (true);
-          size_t b (0), e, p;
-
-          while (more)
-          {
-            p = e = t1.find ('\n', b);
-
-            if (p == string::npos)
-            {
-              e = t1.size ();
-              more = false;
-            }
-
-            while (b < e && (t1[b] == 0x20 || t1[b] == 0x0D || t1[b] == 0x09))
-              ++b;
-
-            --e;
-
-            while (e > b && (t1[e] == 0x20 || t1[e] == 0x0D || t1[e] == 0x09))
-              --e;
-
-            if (b <= e)
-              t2.append (t1, b, e - b + 1);
-
-            if (more)
-            {
-              t2 += '\n';
-              b = p + 1;
-            }
-          }
-        }
-
-        // Replace every single newlines with single space and all
-        // multiple new lines (paragraph marker) with a single newline.
-        //
-        t1.clear ();
-        for (size_t i (0), n (t2.size ()); i < n; ++i)
-        {
-          if (t2[i] == '\n')
-          {
-            size_t j (i);
-            for (; i + 1 < n && t2[i + 1] == '\n'; ++i) ;
-
-            if (j != 0 && i + 1 != n) // Strip leading and trailing newlines.
-              t1 += i != j ? '\n' : ' ';
-          }
-          else
-            t1 += t2[i];
-        }
-
-        o->doc ().push_back (t1);
-      }
+        o->doc ().push_back (doc_string (t));
 
       t = lexer_->next ();
 
@@ -866,6 +878,89 @@ option_def (token& t)
   return true;
 }
 
+string parser::
+doc_string (token& t)
+{
+  // Get rid of '"'.
+  //
+  string t1, t2;
+  string const& l (t.literal ());
+  char p ('\0');
+
+  for (size_t i (0), n (l.size ()); i < n; ++i)
+  {
+    if (l[i] == '"' && p != '\\')
+      continue;
+
+    // We need to keep track of \\ escapings so we don't confuse
+    // them with \", as in "\\".
+    //
+    if (l[i] == '\\' && p == '\\')
+      p = '\0';
+    else
+      p = l[i];
+
+    t1 += l[i];
+  }
+
+  // Get rid of leading and trailing spaces in each line.
+  //
+  if (t1.size () != 0)
+  {
+    bool more (true);
+    size_t b (0), e, p;
+
+    while (more)
+    {
+      p = e = t1.find ('\n', b);
+
+      if (p == string::npos)
+      {
+        e = t1.size ();
+        more = false;
+      }
+
+      while (b < e && (t1[b] == 0x20 || t1[b] == 0x0D || t1[b] == 0x09))
+        ++b;
+
+      --e;
+
+      while (e > b && (t1[e] == 0x20 || t1[e] == 0x0D || t1[e] == 0x09))
+        --e;
+
+      if (b <= e)
+        t2.append (t1, b, e - b + 1);
+
+      if (more)
+      {
+        t2 += '\n';
+        b = p + 1;
+      }
+    }
+  }
+
+  // Replace every single newlines with single space and all
+  // multiple new lines (paragraph marker) with a single newline.
+  //
+  t1.clear ();
+  for (size_t i (0), n (t2.size ()); i < n; ++i)
+  {
+    if (t2[i] == '\n')
+    {
+      size_t j (i);
+      for (; i + 1 < n && t2[i + 1] == '\n'; ++i) ;
+
+      if (j != 0 && i + 1 != n) // Strip leading and trailing newlines.
+        t1 += i != j ? '\n' : ' ';
+    }
+    else
+      t1 += t2[i];
+  }
+
+  return t1;
+}
+
+
 bool parser::
 qualified_name (token& t, string& r)
 {
diff --git a/cli/parser.hxx b/cli/parser.hxx
index 29f4b22..e3b1fee 100644
--- a/cli/parser.hxx
+++ b/cli/parser.hxx
@@ -9,7 +9,8 @@
 #include <map>
 #include <string>
 #include <vector>
-#include <memory> // std::auto_ptr
+#include <memory>  // auto_ptr
+#include <cstddef> // size_t
 #include <istream>
 
 #include "semantics/elements.hxx"
@@ -43,6 +44,9 @@ private:
   decl (token&);
 
   void
+  scope_doc (token&);
+
+  void
   namespace_def ();
 
   void
@@ -51,6 +55,9 @@ private:
   bool
   option_def (token&);
 
+  std::string
+  doc_string (token&);
+
   bool
   qualified_name (token&, std::string& name);
 
@@ -73,6 +80,8 @@ private:
   semantics::cli_unit* cur_;
   semantics::scope* scope_;
 
+  std::size_t doc_count_; // Scope doc counter, see scope_doc() for details.
+
   typedef std::map<semantics::path, semantics::cli_unit*> include_map;
   include_map include_map_;
 };
diff --git a/cli/semantics.hxx b/cli/semantics.hxx
index b6b824f..3f9d61b 100644
--- a/cli/semantics.hxx
+++ b/cli/semantics.hxx
@@ -7,6 +7,7 @@
 #define CLI_SEMANTICS_HXX
 
 #include <semantics/class.hxx>
+#include <semantics/doc.hxx>
 #include <semantics/elements.hxx>
 #include <semantics/expression.hxx>
 #include <semantics/namespace.hxx>
diff --git a/cli/semantics/doc.cxx b/cli/semantics/doc.cxx
new file mode 100644
index 0000000..4b2f9b4
--- /dev/null
+++ b/cli/semantics/doc.cxx
@@ -0,0 +1,28 @@
+// file      : cli/semantics/doc.cxx
+// author    : Boris Kolpackov <boris@codesynthesis.com>
+// copyright : Copyright (c) 2009-2011 Code Synthesis Tools CC
+// license   : MIT; see accompanying LICENSE file
+
+#include <cutl/compiler/type-info.hxx>
+
+#include <semantics/doc.hxx>
+
+namespace semantics
+{
+  // type info
+  //
+  namespace
+  {
+    struct init
+    {
+      init ()
+      {
+        using compiler::type_info;
+
+        type_info ti (typeid (doc));
+        ti.add_base (typeid (nameable));
+        insert (ti);
+      }
+    } init_;
+  }
+}
diff --git a/cli/semantics/doc.hxx b/cli/semantics/doc.hxx
new file mode 100644
index 0000000..4fca4e8
--- /dev/null
+++ b/cli/semantics/doc.hxx
@@ -0,0 +1,23 @@
+// file      : cli/semantics/doc.hxx
+// author    : Boris Kolpackov <boris@codesynthesis.com>
+// copyright : Copyright (c) 2009-2011 Code Synthesis Tools CC
+// license   : MIT; see accompanying LICENSE file
+
+#ifndef CLI_SEMANTICS_DOC_HXX
+#define CLI_SEMANTICS_DOC_HXX
+
+#include <semantics/elements.hxx>
+
+namespace semantics
+{
+  // Scope-level documentation node.
+  //
+  class doc: public nameable, public doc_strings
+  {
+  public:
+    doc (path const& file, size_t line, size_t column)
+        : node (file, line, column) {}
+  };
+}
+
+#endif // CLI_SEMANTICS_DOC_HXX
diff --git a/cli/semantics/elements.hxx b/cli/semantics/elements.hxx
index e650f37..a0adce0 100644
--- a/cli/semantics/elements.hxx
+++ b/cli/semantics/elements.hxx
@@ -40,6 +40,9 @@ namespace semantics
   using fs::path;
   using fs::invalid_path;
 
+  //
+  //
+  typedef std::vector<string> doc_strings;
 
   //
   //
diff --git a/cli/semantics/option.hxx b/cli/semantics/option.hxx
index 20e40c5..90a462a 100644
--- a/cli/semantics/option.hxx
+++ b/cli/semantics/option.hxx
@@ -6,8 +6,6 @@
 #ifndef CLI_SEMANTICS_OPTION_HXX
 #define CLI_SEMANTICS_OPTION_HXX
 
-#include <vector>
-
 #include <semantics/elements.hxx>
 
 namespace semantics
@@ -138,8 +136,7 @@ namespace semantics
     }
 
   public:
-    typedef std::vector<string> doc_list;
-    typedef doc_list::const_iterator doc_iterator;
+    typedef doc_strings::const_iterator doc_iterator;
 
     doc_iterator
     doc_begin () const
@@ -153,13 +150,13 @@ namespace semantics
       return doc_.end ();
     }
 
-    doc_list const&
+    doc_strings const&
     doc () const
     {
       return doc_;
     }
 
-    doc_list&
+    doc_strings&
     doc ()
     {
       return doc_;
@@ -186,7 +183,7 @@ namespace semantics
   private:
     belongs_type* belongs_;
     initialized_type* initialized_;
-    doc_list doc_;
+    doc_strings doc_;
   };
 }
 
diff --git a/cli/source.cxx b/cli/source.cxx
index 1e0be15..d5c3fd3 100644
--- a/cli/source.cxx
+++ b/cli/source.cxx
@@ -185,7 +185,7 @@ namespace
     {
       using semantics::names;
 
-      type::doc_list const& doc (o.doc ());
+      semantics::doc_strings const& doc (o.doc ());
 
       if (options.suppress_undocumented () && doc.empty ())
         return;
@@ -237,7 +237,7 @@ namespace
     {
       using semantics::names;
 
-      type::doc_list const& doc (o.doc ());
+      semantics::doc_strings const& doc (o.doc ());
 
       if (options.suppress_undocumented () && doc.empty ())
         return;
diff --git a/cli/traversal.hxx b/cli/traversal.hxx
index c50a698..32febef 100644
--- a/cli/traversal.hxx
+++ b/cli/traversal.hxx
@@ -7,6 +7,7 @@
 #define CLI_TRAVERSAL_HXX
 
 #include <traversal/class.hxx>
+#include <traversal/doc.hxx>
 #include <traversal/elements.hxx>
 #include <traversal/expression.hxx>
 #include <traversal/namespace.hxx>
diff --git a/cli/traversal/doc.hxx b/cli/traversal/doc.hxx
new file mode 100644
index 0000000..94c8c3e
--- /dev/null
+++ b/cli/traversal/doc.hxx
@@ -0,0 +1,17 @@
+// file      : cli/traversal/doc.hxx
+// author    : Boris Kolpackov <boris@codesynthesis.com>
+// copyright : Copyright (c) 2009-2011 Code Synthesis Tools CC
+// license   : MIT; see accompanying LICENSE file
+
+#ifndef CLI_TRAVERSAL_DOC_HXX
+#define CLI_TRAVERSAL_DOC_HXX
+
+#include <traversal/elements.hxx>
+#include <semantics/doc.hxx>
+
+namespace traversal
+{
+  struct doc: node<semantics::doc> {};
+}
+
+#endif // CLI_TRAVERSAL_DOC_HXX
-- 
cgit v1.1