From 2bc006daa59995085626b3e9c0754823bf9d95a5 Mon Sep 17 00:00:00 2001
From: Boris Kolpackov <boris@codesynthesis.com>
Date: Wed, 29 Jun 2011 14:08:29 +0200
Subject: Add --include-regex and --include-regex-trace options

This requires libcutl update.
---
 NEWS                   |   9 +++++
 doc/odb-epilogue.1     |  24 ++++++++++++
 doc/odb-epilogue.xhtml |  28 ++++++++++++++
 odb/context.cxx        |  58 ++++++++++++++++++++++++++++
 odb/context.hxx        | 102 +++++++++++++++++++++++++++++++------------------
 odb/generator.cxx      |  21 +++++-----
 odb/include.cxx        |   6 +--
 odb/options.cli        |  29 ++++++++++++++
 8 files changed, 225 insertions(+), 52 deletions(-)
diff --git a/NEWS b/NEWS
index dbfc341..8dde908 100644
--- a/NEWS
+++ b/NEWS
@@ -1,3 +1,12 @@
+Version 1.5.0
+
+  * New option, --include-regex, allows the modification of the #include
+    directive paths generated by the ODB compiler. This is primarily useful
+    when placing the generating code into subdirectories and the #include
+    directives have to be adjusted accordingly. The --include-regex-trace
+    option is useful for debugging the expressions specified with
+    --include-regex.
+
 Version 1.4.0
 
   * New profile, qt, provides persistence support for the Qt framework. This
diff --git a/doc/odb-epilogue.1 b/doc/odb-epilogue.1
index c2dbd05..b202796 100644
--- a/doc/odb-epilogue.1
+++ b/doc/odb-epilogue.1
@@ -1,4 +1,28 @@
 .\"
+.\" REGEX AND SHELL QUOTING
+.\"
+.SH REGEX AND SHELL QUOTING
+When entering a regular expression argument in the shell command line
+it is often necessary to use quoting (enclosing the argument in " "
+or ' ') in order to prevent the shell from interpreting certain
+characters, for example, spaces as argument separators and $ as
+variable expansions.
+
+Unfortunately it is hard to achieve this in a manner that is portable
+across POSIX shells, such as those found on GNU/Linux and UNIX, and
+Windows shell. For example, if you use " " for quoting you will get
+a wrong result with POSIX shells if your expression contains $. The
+standard way of dealing with this on POSIX systems is to use ' '
+instead. Unfortunately, Windows shell does not remove ' '  from
+arguments when they are passed to applications. As a result you may
+have to use ' ' for POSIX and " " for Windows ($ is not treated as
+a special character on Windows).
+
+Alternatively, you can save regular expression options into a file,
+one option per line, and use this file with the
+.B --options-file
+option. With this approach you don't need to worry about shell quoting.
+.\"
 .\" DIAGNOSTICS
 .\"
 .SH DIAGNOSTICS
diff --git a/doc/odb-epilogue.xhtml b/doc/odb-epilogue.xhtml
index 4f67528..f489a15 100644
--- a/doc/odb-epilogue.xhtml
+++ b/doc/odb-epilogue.xhtml
@@ -1,3 +1,31 @@
+  <h1>REGEX AND SHELL QUOTING</h1>
+
+  <p>When entering a regular expression argument in the shell
+     command line it is often necessary to use quoting (enclosing
+     the argument in <code><b>"&nbsp;"</b></code> or
+     <code><b>'&nbsp;'</b></code>) in order to prevent the shell
+     from interpreting certain characters, for example, spaces as
+     argument separators and <code><b>$</b></code> as variable
+     expansions.</p>
+
+  <p>Unfortunately it is hard to achieve this in a manner that is
+     portable across POSIX shells, such as those found on
+     GNU/Linux and UNIX, and Windows shell. For example, if you
+     use <code><b>"&nbsp;"</b></code> for quoting you will get a
+     wrong result with POSIX shells if your expression contains
+     <code><b>$</b></code>. The standard way of dealing with this
+     on POSIX systems is to use <code><b>'&nbsp;'</b></code> instead.
+     Unfortunately, Windows shell does not remove <code><b>'&nbsp;'</b></code>
+     from arguments when they are passed to applications. As a result you
+     may have to use <code><b>'&nbsp;'</b></code> for POSIX and
+     <code><b>"&nbsp;"</b></code> for Windows (<code><b>$</b></code> is
+     not treated as a special character on Windows).</p>
+
+  <p>Alternatively, you can save regular expression options into
+     a file, one option per line, and use this file with the
+     <code><b>--options-file</b></code> option. With this approach
+     you don't need to worry about shell quoting.</p>
+
   <h1>DIAGNOSTICS</h1>
 
   <p>If the input file is not valid C++, <code><b>odb</b></code>
diff --git a/odb/context.cxx b/odb/context.cxx
index 00edd8d..04761f8 100644
--- a/odb/context.cxx
+++ b/odb/context.cxx
@@ -111,6 +111,7 @@ context (ostream& os_,
       options (ops),
       db (options.database ()),
       keyword_set (data_->keyword_set_),
+      include_regex (data_->include_regex_),
       embedded_schema (ops.generate_schema () &&
                        ops.schema_format ().count (schema_format::embedded)),
       top_object (data_->top_object_),
@@ -121,6 +122,10 @@ context (ostream& os_,
 
   for (size_t i (0); i < sizeof (keywords) / sizeof (char*); ++i)
     data_->keyword_set_.insert (keywords[i]);
+
+  for (strings::const_iterator i (ops.include_regex ().begin ());
+       i != ops.include_regex ().end (); ++i)
+    data_->include_regex_.push_back (regexsub (*i));
 }
 
 context::
@@ -131,6 +136,7 @@ context ()
     options (current ().options),
     db (current ().db),
     keyword_set (current ().keyword_set),
+    include_regex (current ().include_regex),
     embedded_schema (current ().embedded_schema),
     top_object (current ().top_object),
     object (current ().object)
@@ -726,6 +732,58 @@ has_a (semantics::type& t, unsigned short flags)
   return impl.result ();
 }
 
+string context::
+process_include_path (string const& ip, bool prefix, char open)
+{
+  bool t (options.include_regex_trace ());
+  string p (prefix ? options.include_prefix () : string ());
+
+  if (!p.empty () && p[p.size () - 1] != '/')
+    p.append ("/");
+
+  string path (p + ip), r;
+
+  if (t)
+    cerr << "include: '" << path << "'" << endl;
+
+  bool found (false);
+
+  for (regex_mapping::const_iterator i (include_regex.begin ());
+       i != include_regex.end (); ++i)
+  {
+    if (t)
+      cerr << "try: '" << i->regex () << "' : ";
+
+    if (i->match (path))
+    {
+      r = i->replace (path);
+      found = true;
+
+      if (t)
+        cerr << "'" << r << "' : ";
+    }
+
+    if (t)
+      cerr << (found ? '+' : '-') << endl;
+
+    if (found)
+      break;
+  }
+
+  if (!found)
+    r = path;
+
+  // Add brackets or quotes unless the path already has them.
+  //
+  if (!r.empty () && r[0] != '"' && r[0] != '<')
+  {
+    bool b (open == '<' || (open == '\0' && options.include_with_brackets ()));
+    char op (b ? '<' : '"'), cl (b ? '>' : '"');
+    r = op + r + cl;
+  }
+
+  return r;
+}
 
 // namespace
 //
diff --git a/odb/context.hxx b/odb/context.hxx
index 84e9f70..54f6832 100644
--- a/odb/context.hxx
+++ b/odb/context.hxx
@@ -9,11 +9,13 @@
 #include <map>
 #include <set>
 #include <stack>
+#include <vector>
 #include <string>
 #include <ostream>
 #include <cstddef> // std::size_t
 #include <iostream>
 
+#include <cutl/re.hxx>
 #include <cutl/shared-ptr.hxx>
 
 #include <odb/options.hxx>
@@ -23,6 +25,15 @@
 using std::endl;
 using std::cerr;
 
+// Regex.
+//
+using cutl::re::regex;
+using cutl::re::regexsub;
+
+typedef std::vector<regexsub> regex_mapping;
+
+//
+//
 class generation_failed {};
 
 // Keep this enum synchronized with the one in libodb/odb/pointer-traits.hxx.
@@ -51,6 +62,7 @@ class context
 public:
   typedef std::size_t size_t;
   typedef std::string string;
+  typedef std::vector<string> strings;
   typedef std::ostream ostream;
 
   typedef ::options options_type;
@@ -316,6 +328,19 @@ public:
   bool
   has_a (semantics::type&, unsigned short flags);
 
+public:
+  // Process include path by adding the prefix, putting it through
+  // the include regex list, and adding opening and closing include
+  // characters ("" or <>) if necessary. The prefix argument indicates
+  // whether the include prefix specified with the --include-prefix
+  // option should be added. The open argument can be used to specify
+  // the opening character. It can have three values: ", <, or \0. In
+  // case of \0, the character is determined based on the value of the
+  // --include-with-bracket option.
+  //
+  string
+  process_include_path (string const&, bool prefix = true, char open = '\0');
+
   // Diverge output.
   //
 public:
@@ -350,8 +375,45 @@ private:
       return c.get<X> (key);
   }
 
+public:
+  typedef std::set<string> keyword_set_type;
+
+  struct db_type_type
+  {
+    db_type_type () {}
+    db_type_type (string const& t, string const& it)
+        : type (t), id_type (it)
+    {
+    }
+
+    string type;
+    string id_type;
+  };
+  typedef std::map<string, db_type_type> type_map_type;
+
 protected:
-  struct data;
+  struct data
+  {
+    virtual
+    ~data () {}
+    data (std::ostream& os)
+        : os_ (os.rdbuf ()), top_object_ (0), object_ (0)
+    {
+    }
+
+  public:
+    std::ostream os_;
+    std::stack<std::streambuf*> os_stack_;
+
+    semantics::class_* top_object_;
+    semantics::class_* object_;
+
+    keyword_set_type keyword_set_;
+    type_map_type type_map_;
+
+    regex_mapping include_regex_;
+  };
+
   typedef cutl::shared_ptr<data> data_ptr;
   data_ptr data_;
 
@@ -361,9 +423,10 @@ public:
   options_type const& options;
   database const db;
 
-  typedef std::set<string> keyword_set_type;
   keyword_set_type const& keyword_set;
 
+  regex_mapping const& include_regex;
+
   bool embedded_schema;
 
   // Outermost object currently being traversed.
@@ -375,20 +438,6 @@ public:
   //
   semantics::class_*& object;
 
-  struct db_type_type
-  {
-    db_type_type () {}
-    db_type_type (string const& t, string const& it)
-        : type (t), id_type (it)
-    {
-    }
-
-    string type;
-    string id_type;
-  };
-
-  typedef std::map<string, db_type_type> type_map_type;
-
   // Per-database customizable functionality.
   //
 protected:
@@ -420,27 +469,6 @@ protected:
                       semantics::context&,
                       column_type_flags);
 
-protected:
-  struct data
-  {
-    virtual
-    ~data () {}
-    data (std::ostream& os)
-        : os_ (os.rdbuf ()), top_object_ (0), object_ (0)
-    {
-    }
-
-  public:
-    std::ostream os_;
-    std::stack<std::streambuf*> os_stack_;
-
-    semantics::class_* top_object_;
-    semantics::class_* object_;
-
-    keyword_set_type keyword_set_;
-    type_map_type type_map_;
-  };
-
 public:
   typedef context root_context;
 
diff --git a/odb/generator.cxx b/odb/generator.cxx
index 60f8e2a..7fa6d6a 100644
--- a/odb/generator.cxx
+++ b/odb/generator.cxx
@@ -232,13 +232,7 @@ generate (options const& ops, semantics::unit& unit, path const& p)
 
     // Include settings.
     //
-    bool br (ops.include_with_brackets ());
-    string ip (ops.include_prefix ());
     string gp (ops.guard_prefix ());
-
-    if (!ip.empty () && ip[ip.size () - 1] != '/')
-      ip.append ("/");
-
     if (!gp.empty () && gp[gp.size () - 1] != '_')
       gp.append ("_");
 
@@ -275,8 +269,7 @@ generate (options const& ops, semantics::unit& unit, path const& p)
           << "// End prologue." << endl
           << endl;
 
-      hxx << "#include " << (br ? '<' : '"') << ip << file <<
-        (br ? '>' : '"') << endl
+      hxx << "#include " << ctx->process_include_path (file.string ()) << endl
           << endl;
 
       include::generate ();
@@ -297,8 +290,7 @@ generate (options const& ops, semantics::unit& unit, path const& p)
         }
       }
 
-      hxx << "#include " << (br ? '<' : '"') << ip << ixx_name <<
-        (br ? '>' : '"') << endl
+      hxx << "#include " << ctx->process_include_path (ixx_name) << endl
           << endl;
 
       // Copy epilogue.
@@ -375,8 +367,7 @@ generate (options const& ops, semantics::unit& unit, path const& p)
           << "// End prologue." << endl
           << endl;
 
-      cxx << "#include " << (br ? '<' : '"') << ip << hxx_name <<
-        (br ? '>' : '"') << endl
+      cxx << "#include " << ctx->process_include_path (hxx_name) << endl
           << endl;
 
       switch (ops.database ())
@@ -444,6 +435,12 @@ generate (options const& ops, semantics::unit& unit, path const& p)
     //
     throw failed ();
   }
+  catch (const re::format& e)
+  {
+    cerr << "error: invalid regex: '" << e.regex () << "': " <<
+      e.description () << endl;
+    throw failed ();
+  }
   catch (semantics::invalid_path const& e)
   {
     cerr << "error: '" << e.path () << "' is not a valid filesystem path"
diff --git a/odb/include.cxx b/odb/include.cxx
index de3be4a..8d70b64 100644
--- a/odb/include.cxx
+++ b/odb/include.cxx
@@ -606,9 +606,9 @@ namespace include
       f += ctx.options.odb_file_suffix ();
       f += ctx.options.hxx_suffix ();
 
-      ctx.os << "#include " <<
-        (inc->type_ == include_directive::quote ? '"' : '<') << f <<
-        (inc->type_ == include_directive::quote ? '"' : '>') << endl
+      char o (inc->type_ == include_directive::quote ? '"' : '<');
+      ctx.os << "#include " << ctx.process_include_path (
+        f.string (), false, o) << endl
              << endl;
     }
   }
diff --git a/odb/options.cli b/odb/options.cli
index d61d60f..a3595ce 100644
--- a/odb/options.cli
+++ b/odb/options.cli
@@ -343,6 +343,35 @@ class options
     "Add <prefix> to the generated \cb{#include} directive paths."
   };
 
+  std::vector<std::string> --include-regex
+  {
+    "<regex>",
+    "Add <regex> to the list of regular expressions used to transform
+     generated \cb{#include} directive paths. The argument to this option
+     is a Perl-like regular expression in the form
+     \c{\b{/}\i{pattern}\b{/}\i{replacement}\b{/}}. Any character can be
+     used as a delimiter instead of \cb{/} and the delimiter can be escaped
+     inside \ci{pattern} and \ci{replacement} with a backslash (\cb{\\}).
+     You can specify multiple regular expressions by repeating this option.
+     All the regular expressions are tried in the order specified and the
+     first expression that matches is used.
+
+     As an example, the following expression transforms include paths in
+     the form \cb{foo/bar-odb.h} to paths in the form
+     \cb{foo/generated/bar-odb.h}:
+
+     \cb{%foo/(.+)-odb.h%foo/generated/$1-odb.h%}
+
+     See also the REGEX AND SHELL QUOTING section below."
+  };
+
+  bool --include-regex-trace
+  {
+    "Trace the process of applying regular expressions specified with the
+     \cb{--include-regex} option. Use this option to find out why your
+     regular expressions don't do what you expected them to do."
+  };
+
   std::string --guard-prefix
   {
     "<prefix>",
-- 
cgit v1.1