From cea6fb57ac8c9a893c0f404fef6c1469f0b6222b Mon Sep 17 00:00:00 2001
From: Boris Kolpackov <boris@codesynthesis.com>
Date: Thu, 22 Jul 2010 14:33:21 +0200
Subject: Next chunk of functionality

Add SQL language lexer. Implement MySQL type declaration parser.
Create sub-directories for databases, currently mysql and tracer.
Create MySQL-specific context.
---
 odb/sql-lexer.cxx | 240 ++++++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 240 insertions(+)
 create mode 100644 odb/sql-lexer.cxx

(limited to 'odb/sql-lexer.cxx')
diff --git a/odb/sql-lexer.cxx b/odb/sql-lexer.cxx
new file mode 100644
index 0000000..e5b1693
--- /dev/null
+++ b/odb/sql-lexer.cxx
@@ -0,0 +1,240 @@
+// file      : odb/sql-lexer.cxx
+// author    : Boris Kolpackov <boris@codesynthesis.com>
+// copyright : Copyright (c) 2009-2010 Code Synthesis Tools CC
+// license   : GNU GPL v2; see accompanying LICENSE file
+
+#include <iostream>
+
+#include <odb/sql-lexer.hxx>
+
+using namespace std;
+
+sql_lexer::
+sql_lexer (std::string const& sql)
+    : loc_ ("C"),
+      is_ (sql),
+      l_ (1),
+      c_(1),
+      eos_ (false),
+      buf_ (0, 0, 0),
+      unget_ (false)
+{
+}
+
+sql_lexer::xchar sql_lexer::
+peek ()
+{
+  if (unget_)
+    return buf_;
+  else
+  {
+    if (eos_)
+      return xchar (xchar::traits_type::eof (), l_, c_);
+    else
+    {
+      xchar::int_type i (is_.peek ());
+
+      if (i == xchar::traits_type::eof ())
+        eos_ = true;
+
+      return xchar (i, l_, c_);
+    }
+  }
+}
+
+sql_lexer::xchar sql_lexer::
+get ()
+{
+  if (unget_)
+  {
+    unget_ = false;
+    return buf_;
+  }
+  else
+  {
+    // When is_.get () returns eof, the failbit is also set (stupid,
+    // isn't?) which may trigger an exception. To work around this
+    // we will call peek() first and only call get() if it is not
+    // eof. But we can only call peek() on eof once; any subsequent
+    // calls will spoil the failbit (even more stupid).
+    //
+    xchar c (peek ());
+
+    if (!is_eos (c))
+    {
+      is_.get ();
+
+      if (c == '\n')
+      {
+        l_++;
+        c_ = 1;
+      }
+      else
+        c_++;
+    }
+
+    return c;
+  }
+}
+
+void sql_lexer::
+unget (xchar c)
+{
+  // Because iostream::unget cannot work once eos is reached,
+  // we have to provide our own implementation.
+  //
+  buf_ = c;
+  unget_ = true;
+}
+
+sql_token sql_lexer::
+next ()
+{
+  skip_spaces ();
+
+  xchar c (get ());
+
+  if (is_eos (c))
+    return sql_token ();
+
+  switch (c)
+  {
+  case '\'':
+    {
+      return string_literal (c);
+    }
+  case '\"':
+    {
+      return string_literal (c);
+    }
+  case '`':
+    {
+      return string_literal (c);
+    }
+  case ';':
+    {
+      return sql_token (sql_token::p_semi);
+    }
+  case ',':
+    {
+      return sql_token (sql_token::p_comma);
+    }
+  case '(':
+    {
+      return sql_token (sql_token::p_lparen);
+    }
+  case ')':
+    {
+      return sql_token (sql_token::p_rparen);
+    }
+  case '=':
+    {
+      return sql_token (sql_token::p_eq);
+    }
+  case '-':
+    {
+      return int_literal (get (), true);
+    }
+  case '+':
+    {
+      return int_literal (get (), false);
+    }
+  }
+
+  if (is_alpha (c) || c == '_')
+  {
+    return identifier (c);
+  }
+
+  if (is_dec_digit (c))
+  {
+    return int_literal (c);
+  }
+
+  ostringstream msg;
+  msg << "unexpected character '" << c << "'";
+  throw invalid_input (c.line (), c.column (), msg.str ());
+}
+
+void sql_lexer::
+skip_spaces ()
+{
+  for (xchar c (peek ());; c = peek ())
+  {
+    if (is_eos (c) || !is_space (c))
+      break;
+
+    get ();
+  }
+}
+
+sql_token sql_lexer::
+identifier (xchar c)
+{
+  size_t ln (c.line ()), cl (c.column ());
+  string lexeme;
+  lexeme += c;
+
+  for (c = peek ();
+       !is_eos (c) && (is_alnum (c) || c == '_');
+       c = peek ())
+  {
+    get ();
+    lexeme += c;
+  }
+
+  return sql_token (sql_token::t_identifier, lexeme);
+}
+
+sql_token sql_lexer::
+int_literal (xchar c, bool neg, size_t ml, size_t mc)
+{
+  //size_t ln (neg ? ml : c.line ()), cl (neg ? mc : c.column ());
+  string lexeme;
+
+  if (neg)
+    lexeme += '-';
+
+  lexeme += c;
+
+  for (c = peek (); !is_eos (c) && is_dec_digit (c); c = peek ())
+  {
+    get ();
+    lexeme += c;
+  }
+
+  return sql_token (sql_token::t_int_lit, lexeme);
+}
+
+sql_token sql_lexer::
+string_literal (xchar c)
+{
+  //size_t ln (c.line ()), cl (c.column ());
+  char q (c), p ('\0');
+  string lexeme;
+  lexeme += c;
+
+  while (true)
+  {
+    xchar c = get ();
+
+    if (is_eos (c))
+      throw invalid_input (
+        c.line (), c.column (), "unterminated quoted string");
+
+    lexeme += c;
+
+    if (c == q && p != '\\')
+      break;
+
+    // We need to keep track of \\ escapings so we don't confuse
+    // them with \", as in "\\".
+    //
+    if (c == '\\' && p == '\\')
+      p = '\0';
+    else
+      p = c;
+  }
+
+  return sql_token (sql_token::t_string_lit, lexeme);
+}
-- 
cgit v1.1