From 74f50a3bc6890c96532fe9e76e387171aec3a8c3 Mon Sep 17 00:00:00 2001 From: Boris Kolpackov Date: Tue, 27 Aug 2013 08:22:03 +0200 Subject: Add support for statement processing --- odb/makefile | 31 +- odb/statement-processing.cxx | 770 +++++++++++++++++++++++++++++++++++++++++++ odb/statement.hxx | 63 ++++ 3 files changed, 849 insertions(+), 15 deletions(-) create mode 100644 odb/statement-processing.cxx diff --git a/odb/makefile b/odb/makefile index 5a58103..25ac250 100644 --- a/odb/makefile +++ b/odb/makefile @@ -4,21 +4,22 @@ include $(dir $(lastword $(MAKEFILE_LIST)))../build/bootstrap.make -cxx := \ -callback.cxx \ -exceptions.cxx \ -database.cxx \ -vector-impl.cxx \ -connection.cxx \ -lazy-ptr-impl.cxx \ -prepared-query.cxx \ -query-dynamic.cxx \ -result.cxx \ -schema-catalog.cxx \ -section.cxx \ -session.cxx \ -statement.cxx \ -tracer.cxx \ +cxx := \ +callback.cxx \ +exceptions.cxx \ +database.cxx \ +vector-impl.cxx \ +connection.cxx \ +lazy-ptr-impl.cxx \ +prepared-query.cxx \ +query-dynamic.cxx \ +result.cxx \ +schema-catalog.cxx \ +section.cxx \ +session.cxx \ +statement.cxx \ +statement-processing.cxx \ +tracer.cxx \ transaction.cxx # Implementation details. diff --git a/odb/statement-processing.cxx b/odb/statement-processing.cxx new file mode 100644 index 0000000..f647966 --- /dev/null +++ b/odb/statement-processing.cxx @@ -0,0 +1,770 @@ +// file : odb/statement-processing.cxx +// copyright : Copyright (c) 2009-2013 Code Synthesis Tools CC +// license : GNU GPL v2; see accompanying LICENSE file + +// Place the statement processing code into a separate source file +// to minimize statically-linked object code size when processing +// is not used. + +#include + +#include + +using namespace std; + +// #define LIBODB_DEBUG_STATEMENT_PROCESSING 1 + +namespace odb +{ + typedef char_traits traits; + typedef const void* const* bind_type; + + static inline const void* + bind_at (size_t i, bind_type bind, size_t bind_skip) + { + const char* b (reinterpret_cast (bind)); + return *reinterpret_cast (b + i * bind_skip); + } + + static inline const char* + find (const char* b, const char* e, char c) + { + return traits::find (b, e - b, c); + } + + static inline const char* + rfind (const char* b, const char* e, char c) + { + for (--e; b != e; --e) + if (*e == c) + return e; + + return 0; + } + + // Iterate over INSERT column/value list, UPDATE SET expression list, + // or SELECT column/join list. + // + // for (const char* b (columns_begin), *e (begin (b, end)); + // e != 0; + // next (b, e, end)) + // { + // // b points to the beginning of the value (i.e., one past '('). + // // e points one past the end of the value (i.e., to ',', ')', or '\n'). + // } + // + // // b points one past the last value. + // + static inline const char* + paren_begin (const char*& b, const char* end) + { + // Note that the list may not end with '\n'. + + b++; // Skip '('. + const char* e (find (b, end, '\n')); + return (e != 0 ? e : end) - 1; // Skip ',' or ')'. + } + + static inline void + paren_next (const char*& b, const char*& e, const char* end) + { + if (*e == ',') + { + b = e + 2; // Skip past '\n'. + e = find (b, end, '\n'); + e = (e != 0 ? e : end) - 1; // Skip ',' or ')'. + } + else + { + b = (e + 1 != end ? e + 2 : end); // Skip past '\n'. + e = 0; + } + } + + static inline const char* + comma_begin (const char* b, const char* end) + { + // Note that the list may not end with '\n'. + + const char* e (find (b, end, '\n')); + return e != 0 ? e - (*(e - 1) == ',' ? 1 : 0) : end; // Skip ','. + } + + static inline void + comma_next (const char*& b, const char*& e, const char* end) + { + if (*e == ',') + { + b = e + 2; // Skip past '\n'. + e = find (b, end, '\n'); + e = (e != 0 ? e - (*(e - 1) == ',' ? 1 : 0) : end); // Skip ','. + } + else + { + b = (e != end ? e + 1 : end); // Skip past '\n'. + e = 0; + } + } + + static inline const char* + newline_begin (const char* b, const char* end) + { + // Note that the list may not end with '\n'. + + const char* e (find (b, end, '\n')); + return e != 0 ? e : end; + } + + static inline void + newline_next (const char*& b, + const char*& e, + const char* end, + const char* prefix, + size_t prefix_size) + { + if (e != end) + e++; // Skip past '\n'. + + b = e; + + // Do we have another element? + // + if (static_cast (end - b) > prefix_size && + traits::compare (b, prefix, prefix_size) == 0) + { + e = find (b, end, '\n'); + if (e == 0) + e = end; + } + else + e = 0; + } + + // Note that end must point to the beginning of the list. + // + static inline const char* + newline_rbegin (const char* e, const char* end) + { + const char* b (rfind (end, e - 1, '\n')); + return b != 0 ? b + 1 : end; // Skip past '\n'. + } + + static inline void + newline_rnext (const char*& b, const char*& e, const char* end) + { + if (b != end) + { + e = b - 1; // Skip to previous '\n'. + b = rfind (end, e - 1, '\n'); + b = (b != 0 ? b + 1 : end); // Skip past '\n'. + } + else + { + e = end - 1; // One before the first element. + b = 0; + } + } + + // Fast path: just remove the "structure". + // + static inline void + process_fast (const char* s, string& r) + { + r = s; + for (size_t i (r.find ('\n')); i != string::npos; i = r.find ('\n', i)) + r[i++] = ' '; + } + + void statement:: + process_insert (const char* s, + bind_type bind, + size_t bind_size, + size_t bind_skip, + char param_symbol, + string& r) + { +#ifndef LIBODB_DEBUG_STATEMENT_PROCESSING + assert (bind_size != 0); // Cannot be versioned. +#endif + + bool fast (true); // Fast case (if all present). + for (size_t i (0); i != bind_size && fast; ++i) + { + if (bind_at (i, bind, bind_skip) == 0) + fast = false; + } + + // Fast path: just remove the "structure". + // +#ifndef LIBODB_DEBUG_STATEMENT_PROCESSING + if (fast) + { + process_fast (s, r); + return; + } +#endif + + // Scan the statement and store the positions of various parts. + // + size_t n (traits::length (s)); + const char* e (s + n); + + // Header. + // + const char* p (find (s, e, '\n')); + assert (p != 0); + size_t header_size (p - s); + p++; + + // Column list. + // + const char* columns_begin (0); + if (*p == '(') + { + columns_begin = p; + + // Find the end of the column list. + // + for (const char* ce (paren_begin (p, e)); ce != 0; paren_next (p, ce, e)) + ; + } + + // OUTPUT + // + const char* output_begin (0); + size_t output_size (0); + if (e - p > 7 && traits::compare (p, "OUTPUT ", 7) == 0) + { + output_begin = p; + p += 7; + p = find (p, e, '\n'); + assert (p != 0); + output_size = p - output_begin; + p++; + } + + // VALUES or DEFAULT VALUES + // + bool empty (true); // DEFAULT VALUES case (if none present). + const char* values_begin (0); + if (e - p > 7 && traits::compare (p, "VALUES\n", 7) == 0) + { + p += 7; + values_begin = p; + + size_t bi (0); + for (const char* ve (paren_begin (p, e)); ve != 0; paren_next (p, ve, e)) + { + // We cannot be empty if we have a non-parameterized value, e.g., + // INSERT ... VALUES(1,?). We also cannot be empty if this value + // is present in the bind array. + // + if (find (p, ve, param_symbol) == 0 || + bind_at (bi++, bind, bind_skip) != 0) + empty = false; + } + } + else + { + // Must be DEFAULT VALUES. + // + assert (traits::compare (p, "DEFAULT VALUES", 14) == 0); + p += 14; + + if (*p == '\n') + p++; + } + + // Trailer. + // + const char* trailer_begin (0); + size_t trailer_size (0); + if (e - p != 0) + { + trailer_begin = p; + trailer_size = e - p; + } + + // Empty case. + // + if (empty) + { + r.reserve (header_size + + (output_size == 0 ? 0 : output_size + 1) + + 15 + // For " DEFAULT VALUES" + (trailer_size == 0 ? 0 : trailer_size + 1)); + + r.assign (s, header_size); + + if (output_size != 0) + { + r += ' '; + r.append (output_begin, output_size); + } + + r += " DEFAULT VALUES"; + + if (trailer_size != 0) + { + r += ' '; + r.append (trailer_begin, trailer_size); + } + + return; + } + + // Assume the same size as the original. It can only shrink, and in + // most cases only slightly. So this is a good approximation. + // + r.reserve (n); + r.assign (s, header_size); + + // Column list. + // + { + r += ' '; + + size_t i (0), bi (0); + + for (const char *c (columns_begin), *ce (paren_begin (c, e)), + *v (values_begin), *ve (paren_begin (v, e)); + ce != 0; paren_next (c, ce, e), paren_next (v, ve, e)) + { + // See if the value contains the parameter symbol and, if so, + // whether it is present in the bind array. + // + if (find (v, ve, param_symbol) != 0 && + bind_at (bi++, bind, bind_skip) == 0) + continue; + + // Append the column. + // + if (i++ == 0) + r += '('; + else + r += ", "; // Add the space for consistency with the fast path. + + r.append (c, ce - c); + } + + r += ')'; + } + + // OUTPUT + // + if (output_size != 0) + { + r += ' '; + r.append (output_begin, output_size); + } + + // Value list. + // + { + r += " VALUES "; + + size_t i (0), bi (0); + + for (const char* v (values_begin), *ve (paren_begin (v, e)); + ve != 0; paren_next (v, ve, e)) + { + // See if the value contains the parameter symbol and, if so, + // whether it is present in the bind array. + // + if (find (v, ve, param_symbol) != 0 && + bind_at (bi++, bind, bind_skip) == 0) + continue; + + // Append the value. + // + if (i++ == 0) + r += '('; + else + r += ", "; // Add the space for consistency with the fast path. + + r.append (v, ve - v); + } + + r += ')'; + } + + // Trailer. + // + if (trailer_size != 0) + { + r += ' '; + r.append (trailer_begin, trailer_size); + } + } + + void statement:: + process_update (const char* s, + bind_type bind, + size_t bind_size, + size_t bind_skip, + char param_symbol, + string& r) + { + bool fast (true); // Fast case (if all present). + for (size_t i (0); i != bind_size && fast; ++i) + { + if (bind_at (i, bind, bind_skip) == 0) + fast = false; + } + + // Fast path: just remove the "structure". + // +#ifndef LIBODB_DEBUG_STATEMENT_PROCESSING + if (fast) + { + process_fast (s, r); + return; + } +#endif + + // Scan the statement and store the positions of various parts. + // + size_t n (traits::length (s)); + const char* e (s + n); + + // Header. + // + const char* p (find (s, e, '\n')); + assert (p != 0); + size_t header_size (p - s); + p++; + + // SET + // + bool empty (true); // Empty SET case. + const char* set_begin (0); + if (e - p > 4 && traits::compare (p, "SET\n", 4) == 0) + { + p += 4; + set_begin = p; + + // Scan the SET list. + // + size_t bi (0); + for (const char* pe (comma_begin (p, e)); pe != 0; comma_next (p, pe, e)) + { + if (empty) + { + // We cannot be empty if we have a non-parameterized set expression, + // e.g., UPDATE ... SET ver=ver+1 ... We also cannot be empty if + // this expression is present in the bind array. + // + if (find (p, pe, param_symbol) == 0 || + bind_at (bi++, bind, bind_skip) != 0) + empty = false; + } + } + } + + // Trailer. + // + const char* trailer_begin (0); + size_t trailer_size (0); + if (e - p != 0) + { + trailer_begin = p; + trailer_size = e - p; + } + + // Empty case. + // + if (empty) + { + r.reserve (header_size + (trailer_size == 0 ? 0 : trailer_size + 1)); + r.assign (s, header_size); + + if (trailer_size != 0) + { + r += ' '; + r.append (trailer_begin, trailer_size); + } + + return; + } + + // Assume the same size as the original. It can only shrink, and in + // most cases only slightly. So this is a good approximation. + // + r.reserve (n); + r.assign (s, header_size); + + // SET list. + // + { + r += " SET "; + + size_t i (0), bi (0); + + for (const char* p (set_begin), *pe (comma_begin (p, e)); + pe != 0; comma_next (p, pe, e)) + { + // See if the value contains the parameter symbol and, if so, + // whether it is present in the bind array. + // + if (find (p, pe, param_symbol) != 0 && + bind_at (bi++, bind, bind_skip) == 0) + continue; + + // Append the expression. + // + if (i++ != 0) + r += ", "; // Add the space for consistency with the fast path. + + r.append (p, pe - p); + } + } + + // Trailer. + // + if (trailer_size != 0) + { + r += ' '; + r.append (trailer_begin, trailer_size); + } + } + + void statement:: + process_select (const char* s, + bind_type bind, + size_t bind_size, + size_t bind_skip, + char quote_open, + char quote_close, +#ifndef LIBODB_DEBUG_STATEMENT_PROCESSING + bool optimize, +#else + bool, +#endif + string& r) + { + bool fast (true); // Fast case (if all present). + for (size_t i (0); i != bind_size && fast; ++i) + { + if (bind_at (i, bind, bind_skip) == 0) + fast = false; + } + + // Fast path: just remove the "structure". + // +#ifndef LIBODB_DEBUG_STATEMENT_PROCESSING + if (fast && !optimize) + { + process_fast (s, r); + return; + } +#endif + + // Scan the statement and store the positions of various parts. + // + size_t n (traits::length (s)); + const char* e (s + n); + + // Header. + // + const char* p (find (s, e, '\n')); + assert (p != 0); + size_t header_size (p - s); + p++; + + // Column list. + // + const char* columns_begin (p); + for (const char* ce (comma_begin (p, e)); ce != 0; comma_next (p, ce, e)) + ; + + // FROM. + assert (traits::compare (p, "FROM ", 5) == 0); + const char* from_begin (p); + p = find (p, e, '\n'); // May not end with '\n'. + if (p == 0) + p = e; + size_t from_size (p - from_begin); + if (p != e) + p++; + + + // JOIN list. + // + const char* joins_begin (0), *joins_end (0); + if (e - p > 5 && traits::compare (p, "LEFT JOIN ", 10) == 0) + { + joins_begin = p; + + // Find the end of the JOIN list. + // + for (const char* je (newline_begin (p, e)); + je != 0; newline_next (p, je, e, "LEFT JOIN ", 10)) + ; + + joins_end = (p != e ? p - 1 : p); + } + +#ifndef LIBODB_DEBUG_STATEMENT_PROCESSING + if (fast && joins_begin == 0) + { + // No JOINs to optimize so can still take the fast path. + // + process_fast (s, r); + return; + } +#endif + + // Trailer (WHERE, ORDER BY, etc). + // + const char* trailer_begin (0); + size_t trailer_size (0); + if (e - p != 0) + { + trailer_begin = p; + trailer_size = e - p; + } + + // Assume the same size as the original. It can only shrink, and in + // most cases only slightly. So this is a good approximation. + // + r.reserve (n); + r.assign (s, header_size); + + // Column list. + // + { + r += ' '; + + size_t i (0), bi (0); + + for (const char *c (columns_begin), *ce (comma_begin (c, e)); + ce != 0; comma_next (c, ce, e)) + { + // See if the column is present in the bind array. + // + if (bind_at (bi++, bind, bind_skip) == 0) + continue; + + // Append the column. + // + if (i++ != 0) + r += ", "; // Add the space for consistency with the fast path. + + r.append (c, ce - c); + } + } + + // From. + // + r += ' '; + r.append (from_begin, from_size); + + // JOIN list, pass 1. + // + size_t join_pos (0); + if (joins_begin != 0) + { + // Fill in the JOIN "area" with spaces. + // + r.resize (r.size () + joins_end - joins_begin + 1, ' '); + join_pos = r.size () + 1; // End of the last JOIN. + } + + // Trailer. + // + if (trailer_size != 0) + { + r += ' '; + r.append (trailer_begin, trailer_size); + } + + // JOIN list, pass 2. + // + if (joins_begin != 0) + { + // Splice the JOINs into the pre-allocated area. + // + for (const char* je (joins_end), *j (newline_rbegin (je, joins_begin)); + j != 0; newline_rnext (j, je, joins_begin)) + { + size_t n (je - j); + + // Get the alias or, if none used, the table name. + // + p = find (j + 10, je, ' '); // 10 for "LEFT JOIN ". + const char* table_end (p); + p++; // Skip space. + + const char* alias_begin (0); + size_t alias_size (0); + if (je - p > 3 && traits::compare (p, "AS ", 3) == 0) + { + p += 3; + alias_begin = p; + alias_size = find (p, je, ' ') - alias_begin; + } + else + { + alias_begin = j + 10; + alias_size = table_end - alias_begin; + } + + // The alias must be quoted. + // + assert (*alias_begin == quote_open && + *(alias_begin + alias_size - 1) == quote_close); + + // We now need to see if the alias is used in either the SELECT + // list, the WHERE conditions, or the ON condition of any of the + // JOINs that we have already processed and decided to keep. + // + // Instead of re-parsing the whole thing again, we are going to + // take a shortcut and simply search for the alias in the statement + // we have constructed so far (that's why we have have added the + // trailer before filling in the JOINs). To make it more robust, + // we are going to do a few extra sanity checks, specifically, + // that the alias is a top level identifier and is followed by + // only a single identifer (column). This will catch cases like + // [s].[t].[c] where [s] is also used as an alias or LEFT JOIN [t] + // where [t] is also used as an alias in another JOIN. + // + bool found (false); + for (size_t p (r.find (alias_begin, 0, alias_size)); + p != string::npos; + p = r.find (alias_begin, p + alias_size, alias_size)) + { + size_t e (p + alias_size); + + // If we are not a top-level qualifier or not a bottom-level, + // then we are done (3 is for at least "[a]"). + // + if ((p != 0 && r[p - 1] == '.') || + (e + 3 >= r.size () || (r[e] != '.' || r[e + 1] != quote_open))) + continue; + + // The only way to distinguish the [a].[c] from FROM [a].[c] or + // LEFT JOIN [a].[c] is by checking the prefix. + // + if ((p > 5 && r.compare (p - 5, 5, "FROM ") == 0) || + (p > 10 && r.compare (p - 10, 10, "LEFT JOIN ") == 0)) + continue; + + // Check that we are followed by a single identifier. + // + e = r.find (quote_close, e + 2); + if (e == string::npos || (e + 1 != r.size () && r[e + 1] == '.')) + continue; + + found = true; + break; + } + + join_pos -= n + 1; // Extra one for space. + if (found) + r.replace (join_pos, n, j, n); + else + r.erase (join_pos - 1, n + 1); // Extra one for space. + } + } + } +} diff --git a/odb/statement.hxx b/odb/statement.hxx index 894b495..00ffa2e 100644 --- a/odb/statement.hxx +++ b/odb/statement.hxx @@ -7,6 +7,9 @@ #include +#include +#include // std::size_t + #include // connection #include @@ -34,6 +37,66 @@ namespace odb protected: statement () {} + + // Statement processing. Kept public only for testing. + // + public: + // Expected statement structure: + // + // INSERT INTO table\n + // [(a,\n + // b)\n] + // [OUTPUT ...\n] + // [VALUES\n + // ($1,\n + // $2)[\n]] + // [DEFAULT VALUES[\n]] + // [RETURNING ...] + // [; SELECT ...] + // + static void + process_insert (const char* statement, + const void* const* bind, // Array of bind buffer pointers. + std::size_t bind_size, // Number of bind elements. + std::size_t bind_skip, // Offset to the next bind. + char param_symbol, // $, ?, :, etc. + std::string& result); + + // Expected statement structure: + // + // UPDATE table\n + // [SET\n + // a=$1,\n + // b=$2[\n]] + // [OUTPUT ...] + // [WHERE ...] + // + static void + process_update (const char* statement, + const void* const* bind, // Array of bind buffer pointers. + std::size_t bind_size, // Number of bind elements. + std::size_t bind_skip, // Offset to the next bind. + char param_symbol, // $, ?, :, etc. + std::string& result); + + // Expected statement structure: + // + // SELECT\n + // [schema.]table.a,\n + // alias.b\n + // FROM [schema.]table\n + // [LEFT JOIN [schema.]table [AS alias] ON ...\n]* + // [WHERE ...] + // + static void + process_select (const char* statement, + const void* const* bind, // Array of bind buffer pointers. + std::size_t bind_size, // Number of bind elements. + std::size_t bind_skip, // Offset to the next bind. + char quote_open, // Identifier opening quote. + char quote_close, // Identifier closing quote. + bool optimize, // Remove unused JOINs. + std::string& result); }; } -- cgit v1.1