From cea6fb57ac8c9a893c0f404fef6c1469f0b6222b Mon Sep 17 00:00:00 2001 From: Boris Kolpackov Date: Thu, 22 Jul 2010 14:33:21 +0200 Subject: Next chunk of functionality Add SQL language lexer. Implement MySQL type declaration parser. Create sub-directories for databases, currently mysql and tracer. Create MySQL-specific context. --- odb/sql-lexer.cxx | 240 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 240 insertions(+) create mode 100644 odb/sql-lexer.cxx (limited to 'odb/sql-lexer.cxx') diff --git a/odb/sql-lexer.cxx b/odb/sql-lexer.cxx new file mode 100644 index 0000000..e5b1693 --- /dev/null +++ b/odb/sql-lexer.cxx @@ -0,0 +1,240 @@ +// file : odb/sql-lexer.cxx +// author : Boris Kolpackov +// copyright : Copyright (c) 2009-2010 Code Synthesis Tools CC +// license : GNU GPL v2; see accompanying LICENSE file + +#include + +#include + +using namespace std; + +sql_lexer:: +sql_lexer (std::string const& sql) + : loc_ ("C"), + is_ (sql), + l_ (1), + c_(1), + eos_ (false), + buf_ (0, 0, 0), + unget_ (false) +{ +} + +sql_lexer::xchar sql_lexer:: +peek () +{ + if (unget_) + return buf_; + else + { + if (eos_) + return xchar (xchar::traits_type::eof (), l_, c_); + else + { + xchar::int_type i (is_.peek ()); + + if (i == xchar::traits_type::eof ()) + eos_ = true; + + return xchar (i, l_, c_); + } + } +} + +sql_lexer::xchar sql_lexer:: +get () +{ + if (unget_) + { + unget_ = false; + return buf_; + } + else + { + // When is_.get () returns eof, the failbit is also set (stupid, + // isn't?) which may trigger an exception. To work around this + // we will call peek() first and only call get() if it is not + // eof. But we can only call peek() on eof once; any subsequent + // calls will spoil the failbit (even more stupid). + // + xchar c (peek ()); + + if (!is_eos (c)) + { + is_.get (); + + if (c == '\n') + { + l_++; + c_ = 1; + } + else + c_++; + } + + return c; + } +} + +void sql_lexer:: +unget (xchar c) +{ + // Because iostream::unget cannot work once eos is reached, + // we have to provide our own implementation. + // + buf_ = c; + unget_ = true; +} + +sql_token sql_lexer:: +next () +{ + skip_spaces (); + + xchar c (get ()); + + if (is_eos (c)) + return sql_token (); + + switch (c) + { + case '\'': + { + return string_literal (c); + } + case '\"': + { + return string_literal (c); + } + case '`': + { + return string_literal (c); + } + case ';': + { + return sql_token (sql_token::p_semi); + } + case ',': + { + return sql_token (sql_token::p_comma); + } + case '(': + { + return sql_token (sql_token::p_lparen); + } + case ')': + { + return sql_token (sql_token::p_rparen); + } + case '=': + { + return sql_token (sql_token::p_eq); + } + case '-': + { + return int_literal (get (), true); + } + case '+': + { + return int_literal (get (), false); + } + } + + if (is_alpha (c) || c == '_') + { + return identifier (c); + } + + if (is_dec_digit (c)) + { + return int_literal (c); + } + + ostringstream msg; + msg << "unexpected character '" << c << "'"; + throw invalid_input (c.line (), c.column (), msg.str ()); +} + +void sql_lexer:: +skip_spaces () +{ + for (xchar c (peek ());; c = peek ()) + { + if (is_eos (c) || !is_space (c)) + break; + + get (); + } +} + +sql_token sql_lexer:: +identifier (xchar c) +{ + size_t ln (c.line ()), cl (c.column ()); + string lexeme; + lexeme += c; + + for (c = peek (); + !is_eos (c) && (is_alnum (c) || c == '_'); + c = peek ()) + { + get (); + lexeme += c; + } + + return sql_token (sql_token::t_identifier, lexeme); +} + +sql_token sql_lexer:: +int_literal (xchar c, bool neg, size_t ml, size_t mc) +{ + //size_t ln (neg ? ml : c.line ()), cl (neg ? mc : c.column ()); + string lexeme; + + if (neg) + lexeme += '-'; + + lexeme += c; + + for (c = peek (); !is_eos (c) && is_dec_digit (c); c = peek ()) + { + get (); + lexeme += c; + } + + return sql_token (sql_token::t_int_lit, lexeme); +} + +sql_token sql_lexer:: +string_literal (xchar c) +{ + //size_t ln (c.line ()), cl (c.column ()); + char q (c), p ('\0'); + string lexeme; + lexeme += c; + + while (true) + { + xchar c = get (); + + if (is_eos (c)) + throw invalid_input ( + c.line (), c.column (), "unterminated quoted string"); + + lexeme += c; + + if (c == q && p != '\\') + break; + + // We need to keep track of \\ escapings so we don't confuse + // them with \", as in "\\". + // + if (c == '\\' && p == '\\') + p = '\0'; + else + p = c; + } + + return sql_token (sql_token::t_string_lit, lexeme); +} -- cgit v1.1