aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorBoris Kolpackov <boris@codesynthesis.com>2021-06-21 12:52:18 +0200
committerBoris Kolpackov <boris@codesynthesis.com>2021-06-29 13:11:59 +0200
commita87c149915fa5a262bd797df1c2a4e756420d413 (patch)
tree620bfe6a33af12e84030139a8fa53d8ecf0de669
parent371ec576553528fc7b8532bf00673cb222836349 (diff)
Add support for bulk operations in PostgreSQL 14
-rw-r--r--NEWS7
-rw-r--r--doc/manual.xhtml98
-rw-r--r--odb/pragma.cxx2
-rw-r--r--odb/relational/pgsql/context.cxx2
-rw-r--r--odb/relational/pgsql/header.cxx14
-rw-r--r--odb/relational/pgsql/source.cxx6
-rw-r--r--odb/relational/source.cxx59
7 files changed, 122 insertions, 66 deletions
diff --git a/NEWS b/NEWS
index 5c63668..d6b2844 100644
--- a/NEWS
+++ b/NEWS
@@ -67,6 +67,13 @@ Version 2.5.0
* Database classes are now move-constructible. This means they can be
returned by value from a function in C++11.
+ * Support for bulk operations in PostgreSQL 14 using the new pipeline mode.
+ For details on bulk operations see Section 15.3, "Bulk Database Operations"
+ in the ODB manual. Note that while this functionality requires libpq
+ version 14 or later, it should be usable with PostgreSQL servers version
+ 7.4 or later. The development of this support was sponsored by Qube
+ Research & Technologies Limited.
+
* Support for SQLite ATTACH DATABASE. Attached databases are represented as
special odb::sqlite::database instances. @@ TODO: doc ref.
diff --git a/doc/manual.xhtml b/doc/manual.xhtml
index 9af0f33..f7fe095 100644
--- a/doc/manual.xhtml
+++ b/doc/manual.xhtml
@@ -774,6 +774,7 @@ for consistency.
<tr><th>19.5.4</th><td><a href="#19.5.4">Date-Time Format</a></td></tr>
<tr><th>19.5.5</th><td><a href="#19.5.5">Timezones</a></td></tr>
<tr><th>19.5.6</th><td><a href="#19.5.6"><code>NUMERIC</code> Type Support</a></td></tr>
+ <tr><th>19.5.7</th><td><a href="#19.5.7">Bulk Operations Support</a></td></tr>
</table>
</td>
</tr>
@@ -18705,23 +18706,24 @@ class derived: public string_base
<h2><a name="15.3">15.3 Bulk Database Operations</a></h2>
- <p>Some database systems supported by ODB provide a mechanism, often
- called bulk or batch statement execution, that allows us to execute
- the same SQL statement on multiple sets of data at once and with a
- single database API call. This often results in significantly
- better performance if we need to execute the same statement for a
- large number of data sets (thousands to millions).</p>
-
- <p>ODB translates this mechanism to bulk operations which allow
- us to persist, update, or erase a range of objects in the database.
- Currently, from all the database systems supported by ODB, only
- Oracle and Microsoft SQL Server are capable of bulk operations.
- There is also currently no emulation of the bulk API for other
- databases nor dynamic multi-database support. As a result, if
- you are using dynamic multi-database support, you will need to
- "drop down" to static support in order to access the bulk API.
- Refer to <a href="#16">Chapter 16, "Multi-Database Support"</a>
- for details.</p>
+ <p>Some database systems supported by ODB provide a mechanism, often called
+ bulk or batch statement execution, that allows us to execute the same SQL
+ statement on multiple sets of data at once and with a single database API
+ call (or equivalent). This often results in significantly better
+ performance if we need to execute the same statement for a large number
+ of data sets (thousands to millions).</p>
+
+ <p>ODB translates this mechanism to bulk operations which allow us to
+ persist, update, or erase a range of objects in the database. Currently,
+ from all the database systems supported by ODB, only Oracle, Microsoft
+ SQL Server, and PostgreSQL are capable of bulk operations (but
+ see <a href="#19.5.7">Section 19.5.7, "Bulk Operations Support"</a> for
+ PostgreSQL limitations). There is also currently no emulation of the bulk
+ API for other databases nor dynamic multi-database support. As a result,
+ if you are using dynamic multi-database support, you will need to "drop
+ down" to static support in order to access the bulk API. Refer
+ to <a href="#16">Chapter 16, "Multi-Database Support"</a> for
+ details.</p>
<p>As we will discuss later in this section, bulk operations have
complex failure semantics that is dictated by the underlying
@@ -18755,15 +18757,15 @@ class person
</pre>
<p>The single argument to the <code>bulk</code> pragma is the batch
- size. The batch size specifies the maximum number of data sets
- that should be handled with a single underlying statement execution.
- If the range that we want to perform the bulk operation on contains
- more objects than the batch size, then ODB will split this operation
- into multiple underlying statement executions (batches). To illustrate
- this point with an example, suppose we want to persist 53,000 objects
- and the batch size is 5,000. ODB will then execute the statement
- 11 times, the first 10 times with 5,000 data sets each, and the
- last time with the remaining 3,000 data sets.</p>
+ size. The batch size specifies the maximum number of data sets that
+ should be handled with a single underlying statement execution (or
+ equivalent). If the range that we want to perform the bulk operation on
+ contains more objects than the batch size, then ODB will split this
+ operation into multiple underlying statement executions (batches). To
+ illustrate this point with an example, suppose we want to persist 53,000
+ objects and the batch size is 5,000. ODB will then execute the statement
+ 11 times, the first 10 times with 5,000 data sets each, and the last time
+ with the remaining 3,000 data sets.</p>
<p>The commonly used batch sizes are in the 2,000-5,000 range, though
smaller or larger batches could provide better performance,
@@ -18780,7 +18782,7 @@ class person
by using the database prefix, for example:</p>
<pre class="cxx">
-#pragma db object mssql:bulk(3000) oracle:bulk(4000)
+#pragma db object mssql:bulk(3000) oracle:bulk(4000) pgsql:bulk(2000)
class person
{
...
@@ -18911,11 +18913,11 @@ db.erase&lt;person> (ids.begin (), ids.end ());
<p>Conceptually, a bulk operation is equivalent to performing the
corresponding non-bulk version in a loop, except when it comes to the
- failure semantics. Both databases that currently are capable of
- bulk operations (Oracle and SQL Server) do not stop when a data
+ failure semantics. Some databases that currently are capable of bulk
+ operations (specifically, Oracle and SQL Server) do not stop when a data
set in a batch fails (for example, because of a unique constraint
- violation). Instead, they continue executing subsequent data
- sets until every element in the batch has been attempted. The
+ violation). Instead, they continue executing subsequent data sets until
+ every element in the batch has been attempted. The
<code>continue_failed</code> argument in the bulk functions listed
above specifies whether ODB should extend this behavior and continue
with subsequent batches if the one it has tried to execute has failed
@@ -19042,20 +19044,19 @@ multiple exceptions, 4 elements attempted, 2 failed:
[3] 1: ORA-00001: unique constraint (ODB_TEST.person_last_i) violated
</pre>
- <p>Both databases that currently are capable of bulk operations return
- a total count of affected rows rather than individual counts for
- each data set. This limitation prevents ODB from being able to
- always determine which elements in the batch haven't affected
- any rows and, for the update and erase operations, translate
- this to the <code>object_not_persistent</code> exceptions. As
- a result, if some elements in the batch haven't affected any
- rows and ODB is unable to determine exactly which ones, it will mark
- all the elements in this batch as "maybe not persistent". That
- is, it will insert the <code>object_not_persistent</code> exception
- and set the <code>maybe</code> flag for every position in the
- batch. The diagnostics string returned by <code>what()</code>
- will also reflect this situation, for example (assuming batch
- size of 3):</p>
+ <p>Some databases that currently are capable of bulk operations
+ (specifically, Oracle and SQL Server) return a total count of affected
+ rows rather than individual counts for each data set. This limitation
+ prevents ODB from being able to always determine which elements in the
+ batch haven't affected any rows and, for the update and erase operations,
+ translate this to the <code>object_not_persistent</code> exceptions. As a
+ result, if some elements in the batch haven't affected any rows and ODB
+ is unable to determine exactly which ones, it will mark all the elements
+ in this batch as "maybe not persistent". That is, it will insert
+ the <code>object_not_persistent</code> exception and set
+ the <code>maybe</code> flag for every position in the batch. The
+ diagnostics string returned by <code>what()</code> will also reflect this
+ situation, for example (assuming batch size of 3):</p>
<pre class="terminal">
multiple exceptions, 4 elements attempted, 4 failed:
@@ -22848,6 +22849,13 @@ SHOW integer_datetimes
ones, as discussed in <a href="#14.8">Section 14.8, "Database
Type Mapping Pragmas"</a>.</p>
+ <h3><a name="19.5.7">19.5.7 Bulk Operations Support</a></h3>
+
+ <p>Support for bulk operations (<a href="#15.3">Section 15.3, "Bulk
+ Database Operations"</a>) requires PostgreSQL client library
+ (<code>libpq</code>) version 14 or later and PostgreSQL server
+ version 7.4 or later.</p>
+
<h2><a name="19.6">19.6 PostgreSQL Index Definitions</a></h2>
diff --git a/odb/pragma.cxx b/odb/pragma.cxx
index bd1d848..6668733 100644
--- a/odb/pragma.cxx
+++ b/odb/pragma.cxx
@@ -1643,8 +1643,6 @@ handle_pragma (cxx_lexer& l,
return;
}
- // base
- //
if (l.next (tl, &tn) != CPP_NUMBER || TREE_CODE (tn) != INTEGER_CST)
{
error (l) << "unsigned integer expected as batch size" << endl;
diff --git a/odb/relational/pgsql/context.cxx b/odb/relational/pgsql/context.cxx
index a9f34dd..7f99f5d 100644
--- a/odb/relational/pgsql/context.cxx
+++ b/odb/relational/pgsql/context.cxx
@@ -85,7 +85,7 @@ namespace relational
insert_send_auto_id = false;
delay_freeing_statement_result = false;
need_image_clone = false;
- generate_bulk = false;
+ generate_bulk = true;
global_index = true;
global_fkey = false;
data_->bind_vector_ = "pgsql::bind*";
diff --git a/odb/relational/pgsql/header.cxx b/odb/relational/pgsql/header.cxx
index ff00eaa..c3efc3e 100644
--- a/odb/relational/pgsql/header.cxx
+++ b/odb/relational/pgsql/header.cxx
@@ -87,6 +87,20 @@ namespace relational
}
os << endl;
+
+ if (poly_derived)
+ return;
+
+ // Bulk operations batch size.
+ //
+ {
+ unsigned long long b (c.count ("bulk")
+ ? c.get<unsigned long long> ("bulk")
+ : 1);
+
+ os << "static const std::size_t batch = " << b << "UL;"
+ << endl;
+ }
}
virtual void
diff --git a/odb/relational/pgsql/source.cxx b/odb/relational/pgsql/source.cxx
index 580103d..b881e48 100644
--- a/odb/relational/pgsql/source.cxx
+++ b/odb/relational/pgsql/source.cxx
@@ -203,7 +203,7 @@ namespace relational
traverse_numeric (member_info& mi)
{
os << b << ".type = pgsql::bind::numeric;"
- << b << ".buffer = " << arg << "." << mi.var << "value.data ();"
+ << b << ".buffer = " << arg << "." << mi.var << "value.data_ptr ();"
<< b << ".capacity = " << arg << "." << mi.var <<
"value.capacity ();"
<< b << ".size = &" << arg << "." << mi.var << "size;"
@@ -224,7 +224,7 @@ namespace relational
{
os << b << ".type = " <<
char_bin_buffer_types[mi.st->type - sql_type::CHAR] << ";"
- << b << ".buffer = " << arg << "." << mi.var << "value.data ();"
+ << b << ".buffer = " << arg << "." << mi.var << "value.data_ptr ();"
<< b << ".capacity = " << arg << "." << mi.var <<
"value.capacity ();"
<< b << ".size = &" << arg << "." << mi.var << "size;"
@@ -245,7 +245,7 @@ namespace relational
traverse_varbit (member_info& mi)
{
os << b << ".type = pgsql::bind::varbit;"
- << b << ".buffer = " << arg << "." << mi.var << "value.data ();"
+ << b << ".buffer = " << arg << "." << mi.var << "value.data_ptr ();"
<< b << ".capacity = " << arg << "." << mi.var <<
"value.capacity ();"
<< b << ".size = &" << arg << "." << mi.var << "size;"
diff --git a/odb/relational/source.cxx b/odb/relational/source.cxx
index d35a827..e00626a 100644
--- a/odb/relational/source.cxx
+++ b/odb/relational/source.cxx
@@ -1172,10 +1172,10 @@ traverse_object (type& c)
<< "{";
if (poly)
- os << "ODB_POTENTIALLY_UNUSED (top);";
+ os << "ODB_POTENTIALLY_UNUSED (top);"
+ << endl;
- os << endl
- << "using namespace " << db << ";"
+ os << "using namespace " << db << ";"
<< endl;
if (poly)
@@ -1461,13 +1461,25 @@ traverse_object (type& c)
<< "{"
<< "const object_type& obj (*objs[i]);"
<< "callback (db, obj, callback_event::pre_persist);"
- //@@ assumption: generate_grow is false
+ //@@ assumption: generate_grow is false or it only affects select (like
+ // in pgsql) so all we have to do is to increment image
+ // version if it grew.
//@@ assumption: insert_send_auto_id is false
- << "init (sts.image (i), obj, statement_insert" <<
- (versioned ? ", svm" : "") << ");"
+ << "image_type& im (sts.image (i));";
+
+ if (generate_grow)
+ os << "if (";
+
+ os << "init (im, obj, statement_insert" << (versioned ? ", svm" : "") << ")";
+
+ if (generate_grow)
+ os << " && i == 0)" << endl
+ << "im.version++";
+
+ os << ";"
<< "}";
- //@@ assumption: generate_grow is false
+ //@@ assumption: generate_grow: as above
os << "binding& imb (sts.insert_image_binding ());"
<< "if (imb.version == 0)"
<< "{"
@@ -1483,7 +1495,7 @@ traverse_object (type& c)
if (bv || auto_id)
{
os << "binding& idb (sts.id_image_binding ());"
- //@@ assumption: generate_grow is false
+ //@@ assumption: generate_grow: as above
<< "if (idb.version == 0)"
<< "{"
<< "bind (idb.bind, sts.id_image ());"
@@ -2212,9 +2224,24 @@ traverse_object (type& c)
if (opt != 0)
os << "const version_type& v (version (obj));";
- os << "init (sts.id_image (i), id (obj)" << (opt != 0 ? ", &v" : "") << ");"
- //@@ assumption: generate_grow false
- << "init (sts.image (i), obj, statement_update);"
+ os << "init (sts.id_image (i), id (obj)" << (opt != 0 ? ", &v" : "") << ");";
+
+ //@@ assumption: generate_grow is false or it only affects select (like
+ // in pgsql) so all we have to do is to increment image
+ // version if it grew.
+
+ os << "image_type& im (sts.image (i));";
+
+ if (generate_grow)
+ os << "if (";
+
+ os << "init (im, obj, statement_update" << (versioned ? ", svm" : "") << ")";
+
+ if (generate_grow)
+ os << " && i == 0)" << endl
+ << "im.version++";
+
+ os << ";"
<< "}";
// Update bindings.
@@ -2223,7 +2250,7 @@ traverse_object (type& c)
<< "binding& imb (sts.update_image_binding ());"
<< endl;
- //@@ assumption: generate_grow false
+ //@@ assumption: generate_grow: as above
//
os << "bool u (false);" // Avoid incrementing version twice.
<< "if (imb.version == 0)"
@@ -2234,7 +2261,7 @@ traverse_object (type& c)
<< "u = true;"
<< "}";
- //@@ assumption: generate_grow false
+ //@@ assumption: generate_grow: as above
//
os << "if (idb.version == 0)"
<< "{"
@@ -2452,7 +2479,8 @@ traverse_object (type& c)
<< "init (sts.id_image (i), *ids[i]);"
<< endl
<< "binding& idb (sts.id_image_binding ());"
- //@@ assumption: generate_grow false
+ //@@ assumption: generate_grow is false or it only affects select (like
+ // in pgsql).
<< "if (idb.version == 0)"
<< "{"
<< "bind (idb.bind, sts.id_image ());"
@@ -2836,7 +2864,8 @@ traverse_object (type& c)
<< "}";
os << "binding& idb (sts.id_image_binding ());"
- //@@ assumption: generate_grow false
+ //@@ assumption: generate_grow is false or it only affects select
+ // (like in pgsql).
<< "if (idb.version == 0)"
<< "{"
<< "bind (idb.bind, sts.id_image ());"