aboutsummaryrefslogtreecommitdiff
path: root/odb/sql-lexer.cxx
diff options
context:
space:
mode:
authorBoris Kolpackov <boris@codesynthesis.com>2010-07-22 14:33:21 +0200
committerBoris Kolpackov <boris@codesynthesis.com>2010-07-22 14:33:21 +0200
commitcea6fb57ac8c9a893c0f404fef6c1469f0b6222b (patch)
treefed8b6ffa8ea2cb6347ece69c0cb81003d0ccbf6 /odb/sql-lexer.cxx
parent5f71c55a1c24c23af1eeb0d664922497a0e5c071 (diff)
Next chunk of functionality
Add SQL language lexer. Implement MySQL type declaration parser. Create sub-directories for databases, currently mysql and tracer. Create MySQL-specific context.
Diffstat (limited to 'odb/sql-lexer.cxx')
-rw-r--r--odb/sql-lexer.cxx240
1 files changed, 240 insertions, 0 deletions
diff --git a/odb/sql-lexer.cxx b/odb/sql-lexer.cxx
new file mode 100644
index 0000000..e5b1693
--- /dev/null
+++ b/odb/sql-lexer.cxx
@@ -0,0 +1,240 @@
+// file : odb/sql-lexer.cxx
+// author : Boris Kolpackov <boris@codesynthesis.com>
+// copyright : Copyright (c) 2009-2010 Code Synthesis Tools CC
+// license : GNU GPL v2; see accompanying LICENSE file
+
+#include <iostream>
+
+#include <odb/sql-lexer.hxx>
+
+using namespace std;
+
+sql_lexer::
+sql_lexer (std::string const& sql)
+ : loc_ ("C"),
+ is_ (sql),
+ l_ (1),
+ c_(1),
+ eos_ (false),
+ buf_ (0, 0, 0),
+ unget_ (false)
+{
+}
+
+sql_lexer::xchar sql_lexer::
+peek ()
+{
+ if (unget_)
+ return buf_;
+ else
+ {
+ if (eos_)
+ return xchar (xchar::traits_type::eof (), l_, c_);
+ else
+ {
+ xchar::int_type i (is_.peek ());
+
+ if (i == xchar::traits_type::eof ())
+ eos_ = true;
+
+ return xchar (i, l_, c_);
+ }
+ }
+}
+
+sql_lexer::xchar sql_lexer::
+get ()
+{
+ if (unget_)
+ {
+ unget_ = false;
+ return buf_;
+ }
+ else
+ {
+ // When is_.get () returns eof, the failbit is also set (stupid,
+ // isn't?) which may trigger an exception. To work around this
+ // we will call peek() first and only call get() if it is not
+ // eof. But we can only call peek() on eof once; any subsequent
+ // calls will spoil the failbit (even more stupid).
+ //
+ xchar c (peek ());
+
+ if (!is_eos (c))
+ {
+ is_.get ();
+
+ if (c == '\n')
+ {
+ l_++;
+ c_ = 1;
+ }
+ else
+ c_++;
+ }
+
+ return c;
+ }
+}
+
+void sql_lexer::
+unget (xchar c)
+{
+ // Because iostream::unget cannot work once eos is reached,
+ // we have to provide our own implementation.
+ //
+ buf_ = c;
+ unget_ = true;
+}
+
+sql_token sql_lexer::
+next ()
+{
+ skip_spaces ();
+
+ xchar c (get ());
+
+ if (is_eos (c))
+ return sql_token ();
+
+ switch (c)
+ {
+ case '\'':
+ {
+ return string_literal (c);
+ }
+ case '\"':
+ {
+ return string_literal (c);
+ }
+ case '`':
+ {
+ return string_literal (c);
+ }
+ case ';':
+ {
+ return sql_token (sql_token::p_semi);
+ }
+ case ',':
+ {
+ return sql_token (sql_token::p_comma);
+ }
+ case '(':
+ {
+ return sql_token (sql_token::p_lparen);
+ }
+ case ')':
+ {
+ return sql_token (sql_token::p_rparen);
+ }
+ case '=':
+ {
+ return sql_token (sql_token::p_eq);
+ }
+ case '-':
+ {
+ return int_literal (get (), true);
+ }
+ case '+':
+ {
+ return int_literal (get (), false);
+ }
+ }
+
+ if (is_alpha (c) || c == '_')
+ {
+ return identifier (c);
+ }
+
+ if (is_dec_digit (c))
+ {
+ return int_literal (c);
+ }
+
+ ostringstream msg;
+ msg << "unexpected character '" << c << "'";
+ throw invalid_input (c.line (), c.column (), msg.str ());
+}
+
+void sql_lexer::
+skip_spaces ()
+{
+ for (xchar c (peek ());; c = peek ())
+ {
+ if (is_eos (c) || !is_space (c))
+ break;
+
+ get ();
+ }
+}
+
+sql_token sql_lexer::
+identifier (xchar c)
+{
+ size_t ln (c.line ()), cl (c.column ());
+ string lexeme;
+ lexeme += c;
+
+ for (c = peek ();
+ !is_eos (c) && (is_alnum (c) || c == '_');
+ c = peek ())
+ {
+ get ();
+ lexeme += c;
+ }
+
+ return sql_token (sql_token::t_identifier, lexeme);
+}
+
+sql_token sql_lexer::
+int_literal (xchar c, bool neg, size_t ml, size_t mc)
+{
+ //size_t ln (neg ? ml : c.line ()), cl (neg ? mc : c.column ());
+ string lexeme;
+
+ if (neg)
+ lexeme += '-';
+
+ lexeme += c;
+
+ for (c = peek (); !is_eos (c) && is_dec_digit (c); c = peek ())
+ {
+ get ();
+ lexeme += c;
+ }
+
+ return sql_token (sql_token::t_int_lit, lexeme);
+}
+
+sql_token sql_lexer::
+string_literal (xchar c)
+{
+ //size_t ln (c.line ()), cl (c.column ());
+ char q (c), p ('\0');
+ string lexeme;
+ lexeme += c;
+
+ while (true)
+ {
+ xchar c = get ();
+
+ if (is_eos (c))
+ throw invalid_input (
+ c.line (), c.column (), "unterminated quoted string");
+
+ lexeme += c;
+
+ if (c == q && p != '\\')
+ break;
+
+ // We need to keep track of \\ escapings so we don't confuse
+ // them with \", as in "\\".
+ //
+ if (c == '\\' && p == '\\')
+ p = '\0';
+ else
+ p = c;
+ }
+
+ return sql_token (sql_token::t_string_lit, lexeme);
+}