From a87c149915fa5a262bd797df1c2a4e756420d413 Mon Sep 17 00:00:00 2001
From: Boris Kolpackov Some database systems supported by ODB provide a mechanism, often
- called bulk or batch statement execution, that allows us to execute
- the same SQL statement on multiple sets of data at once and with a
- single database API call. This often results in significantly
- better performance if we need to execute the same statement for a
- large number of data sets (thousands to millions). ODB translates this mechanism to bulk operations which allow
- us to persist, update, or erase a range of objects in the database.
- Currently, from all the database systems supported by ODB, only
- Oracle and Microsoft SQL Server are capable of bulk operations.
- There is also currently no emulation of the bulk API for other
- databases nor dynamic multi-database support. As a result, if
- you are using dynamic multi-database support, you will need to
- "drop down" to static support in order to access the bulk API.
- Refer to Chapter 16, "Multi-Database Support"
- for details. Some database systems supported by ODB provide a mechanism, often called
+ bulk or batch statement execution, that allows us to execute the same SQL
+ statement on multiple sets of data at once and with a single database API
+ call (or equivalent). This often results in significantly better
+ performance if we need to execute the same statement for a large number
+ of data sets (thousands to millions). ODB translates this mechanism to bulk operations which allow us to
+ persist, update, or erase a range of objects in the database. Currently,
+ from all the database systems supported by ODB, only Oracle, Microsoft
+ SQL Server, and PostgreSQL are capable of bulk operations (but
+ see Section 19.5.7, "Bulk Operations Support" for
+ PostgreSQL limitations). There is also currently no emulation of the bulk
+ API for other databases nor dynamic multi-database support. As a result,
+ if you are using dynamic multi-database support, you will need to "drop
+ down" to static support in order to access the bulk API. Refer
+ to Chapter 16, "Multi-Database Support" for
+ details. As we will discuss later in this section, bulk operations have
complex failure semantics that is dictated by the underlying
@@ -18755,15 +18757,15 @@ class person
The single argument to the 19.5.4 Date-Time Format 19.5.5 Timezones
+ 19.5.6 NUMERIC
Type Support
@@ -18705,23 +18706,24 @@ class derived: public string_base
19.5.7 Bulk Operations Support 15.3 Bulk Database Operations
- bulk
pragma is the batch
- size. The batch size specifies the maximum number of data sets
- that should be handled with a single underlying statement execution.
- If the range that we want to perform the bulk operation on contains
- more objects than the batch size, then ODB will split this operation
- into multiple underlying statement executions (batches). To illustrate
- this point with an example, suppose we want to persist 53,000 objects
- and the batch size is 5,000. ODB will then execute the statement
- 11 times, the first 10 times with 5,000 data sets each, and the
- last time with the remaining 3,000 data sets.
The commonly used batch sizes are in the 2,000-5,000 range, though smaller or larger batches could provide better performance, @@ -18780,7 +18782,7 @@ class person by using the database prefix, for example:
-#pragma db object mssql:bulk(3000) oracle:bulk(4000) +#pragma db object mssql:bulk(3000) oracle:bulk(4000) pgsql:bulk(2000) class person { ... @@ -18911,11 +18913,11 @@ db.erase<person> (ids.begin (), ids.end ());-Conceptually, a bulk operation is equivalent to performing the corresponding non-bulk version in a loop, except when it comes to the - failure semantics. Both databases that currently are capable of - bulk operations (Oracle and SQL Server) do not stop when a data + failure semantics. Some databases that currently are capable of bulk + operations (specifically, Oracle and SQL Server) do not stop when a data set in a batch fails (for example, because of a unique constraint - violation). Instead, they continue executing subsequent data - sets until every element in the batch has been attempted. The + violation). Instead, they continue executing subsequent data sets until + every element in the batch has been attempted. The
continue_failed
argument in the bulk functions listed above specifies whether ODB should extend this behavior and continue with subsequent batches if the one it has tried to execute has failed @@ -19042,20 +19044,19 @@ multiple exceptions, 4 elements attempted, 2 failed: [3] 1: ORA-00001: unique constraint (ODB_TEST.person_last_i) violated
Both databases that currently are capable of bulk operations return
- a total count of affected rows rather than individual counts for
- each data set. This limitation prevents ODB from being able to
- always determine which elements in the batch haven't affected
- any rows and, for the update and erase operations, translate
- this to the object_not_persistent
exceptions. As
- a result, if some elements in the batch haven't affected any
- rows and ODB is unable to determine exactly which ones, it will mark
- all the elements in this batch as "maybe not persistent". That
- is, it will insert the object_not_persistent
exception
- and set the maybe
flag for every position in the
- batch. The diagnostics string returned by what()
- will also reflect this situation, for example (assuming batch
- size of 3):
Some databases that currently are capable of bulk operations
+ (specifically, Oracle and SQL Server) return a total count of affected
+ rows rather than individual counts for each data set. This limitation
+ prevents ODB from being able to always determine which elements in the
+ batch haven't affected any rows and, for the update and erase operations,
+ translate this to the object_not_persistent
exceptions. As a
+ result, if some elements in the batch haven't affected any rows and ODB
+ is unable to determine exactly which ones, it will mark all the elements
+ in this batch as "maybe not persistent". That is, it will insert
+ the object_not_persistent
exception and set
+ the maybe
flag for every position in the batch. The
+ diagnostics string returned by what()
will also reflect this
+ situation, for example (assuming batch size of 3):
multiple exceptions, 4 elements attempted, 4 failed: @@ -22848,6 +22849,13 @@ SHOW integer_datetimes ones, as discussed in Section 14.8, "Database Type Mapping Pragmas". +19.5.7 Bulk Operations Support
+ +Support for bulk operations (Section 15.3, "Bulk + Database Operations") requires PostgreSQL client library + (
+libpq
) version 14 or later and PostgreSQL server + version 7.4 or later.19.6 PostgreSQL Index Definitions
diff --git a/odb/pragma.cxx b/odb/pragma.cxx index bd1d848..6668733 100644 --- a/odb/pragma.cxx +++ b/odb/pragma.cxx @@ -1643,8 +1643,6 @@ handle_pragma (cxx_lexer& l, return; } - // base - // if (l.next (tl, &tn) != CPP_NUMBER || TREE_CODE (tn) != INTEGER_CST) { error (l) << "unsigned integer expected as batch size" << endl; diff --git a/odb/relational/pgsql/context.cxx b/odb/relational/pgsql/context.cxx index a9f34dd..7f99f5d 100644 --- a/odb/relational/pgsql/context.cxx +++ b/odb/relational/pgsql/context.cxx @@ -85,7 +85,7 @@ namespace relational insert_send_auto_id = false; delay_freeing_statement_result = false; need_image_clone = false; - generate_bulk = false; + generate_bulk = true; global_index = true; global_fkey = false; data_->bind_vector_ = "pgsql::bind*"; diff --git a/odb/relational/pgsql/header.cxx b/odb/relational/pgsql/header.cxx index ff00eaa..c3efc3e 100644 --- a/odb/relational/pgsql/header.cxx +++ b/odb/relational/pgsql/header.cxx @@ -87,6 +87,20 @@ namespace relational } os << endl; + + if (poly_derived) + return; + + // Bulk operations batch size. + // + { + unsigned long long b (c.count ("bulk") + ? c.get("bulk") + : 1); + + os << "static const std::size_t batch = " << b << "UL;" + << endl; + } } virtual void diff --git a/odb/relational/pgsql/source.cxx b/odb/relational/pgsql/source.cxx index 580103d..b881e48 100644 --- a/odb/relational/pgsql/source.cxx +++ b/odb/relational/pgsql/source.cxx @@ -203,7 +203,7 @@ namespace relational traverse_numeric (member_info& mi) { os << b << ".type = pgsql::bind::numeric;" - << b << ".buffer = " << arg << "." << mi.var << "value.data ();" + << b << ".buffer = " << arg << "." << mi.var << "value.data_ptr ();" << b << ".capacity = " << arg << "." << mi.var << "value.capacity ();" << b << ".size = &" << arg << "." << mi.var << "size;" @@ -224,7 +224,7 @@ namespace relational { os << b << ".type = " << char_bin_buffer_types[mi.st->type - sql_type::CHAR] << ";" - << b << ".buffer = " << arg << "." << mi.var << "value.data ();" + << b << ".buffer = " << arg << "." << mi.var << "value.data_ptr ();" << b << ".capacity = " << arg << "." << mi.var << "value.capacity ();" << b << ".size = &" << arg << "." << mi.var << "size;" @@ -245,7 +245,7 @@ namespace relational traverse_varbit (member_info& mi) { os << b << ".type = pgsql::bind::varbit;" - << b << ".buffer = " << arg << "." << mi.var << "value.data ();" + << b << ".buffer = " << arg << "." << mi.var << "value.data_ptr ();" << b << ".capacity = " << arg << "." << mi.var << "value.capacity ();" << b << ".size = &" << arg << "." << mi.var << "size;" diff --git a/odb/relational/source.cxx b/odb/relational/source.cxx index d35a827..e00626a 100644 --- a/odb/relational/source.cxx +++ b/odb/relational/source.cxx @@ -1172,10 +1172,10 @@ traverse_object (type& c) << "{"; if (poly) - os << "ODB_POTENTIALLY_UNUSED (top);"; + os << "ODB_POTENTIALLY_UNUSED (top);" + << endl; - os << endl - << "using namespace " << db << ";" + os << "using namespace " << db << ";" << endl; if (poly) @@ -1461,13 +1461,25 @@ traverse_object (type& c) << "{" << "const object_type& obj (*objs[i]);" << "callback (db, obj, callback_event::pre_persist);" - //@@ assumption: generate_grow is false + //@@ assumption: generate_grow is false or it only affects select (like + // in pgsql) so all we have to do is to increment image + // version if it grew. //@@ assumption: insert_send_auto_id is false - << "init (sts.image (i), obj, statement_insert" << - (versioned ? ", svm" : "") << ");" + << "image_type& im (sts.image (i));"; + + if (generate_grow) + os << "if ("; + + os << "init (im, obj, statement_insert" << (versioned ? ", svm" : "") << ")"; + + if (generate_grow) + os << " && i == 0)" << endl + << "im.version++"; + + os << ";" << "}"; - //@@ assumption: generate_grow is false + //@@ assumption: generate_grow: as above os << "binding& imb (sts.insert_image_binding ());" << "if (imb.version == 0)" << "{" @@ -1483,7 +1495,7 @@ traverse_object (type& c) if (bv || auto_id) { os << "binding& idb (sts.id_image_binding ());" - //@@ assumption: generate_grow is false + //@@ assumption: generate_grow: as above << "if (idb.version == 0)" << "{" << "bind (idb.bind, sts.id_image ());" @@ -2212,9 +2224,24 @@ traverse_object (type& c) if (opt != 0) os << "const version_type& v (version (obj));"; - os << "init (sts.id_image (i), id (obj)" << (opt != 0 ? ", &v" : "") << ");" - //@@ assumption: generate_grow false - << "init (sts.image (i), obj, statement_update);" + os << "init (sts.id_image (i), id (obj)" << (opt != 0 ? ", &v" : "") << ");"; + + //@@ assumption: generate_grow is false or it only affects select (like + // in pgsql) so all we have to do is to increment image + // version if it grew. + + os << "image_type& im (sts.image (i));"; + + if (generate_grow) + os << "if ("; + + os << "init (im, obj, statement_update" << (versioned ? ", svm" : "") << ")"; + + if (generate_grow) + os << " && i == 0)" << endl + << "im.version++"; + + os << ";" << "}"; // Update bindings. @@ -2223,7 +2250,7 @@ traverse_object (type& c) << "binding& imb (sts.update_image_binding ());" << endl; - //@@ assumption: generate_grow false + //@@ assumption: generate_grow: as above // os << "bool u (false);" // Avoid incrementing version twice. << "if (imb.version == 0)" @@ -2234,7 +2261,7 @@ traverse_object (type& c) << "u = true;" << "}"; - //@@ assumption: generate_grow false + //@@ assumption: generate_grow: as above // os << "if (idb.version == 0)" << "{" @@ -2452,7 +2479,8 @@ traverse_object (type& c) << "init (sts.id_image (i), *ids[i]);" << endl << "binding& idb (sts.id_image_binding ());" - //@@ assumption: generate_grow false + //@@ assumption: generate_grow is false or it only affects select (like + // in pgsql). << "if (idb.version == 0)" << "{" << "bind (idb.bind, sts.id_image ());" @@ -2836,7 +2864,8 @@ traverse_object (type& c) << "}"; os << "binding& idb (sts.id_image_binding ());" - //@@ assumption: generate_grow false + //@@ assumption: generate_grow is false or it only affects select + // (like in pgsql). << "if (idb.version == 0)" << "{" << "bind (idb.bind, sts.id_image ());" -- cgit v1.1