diff options
author | Boris Kolpackov <boris@codesynthesis.com> | 2010-07-22 14:33:21 +0200 |
---|---|---|
committer | Boris Kolpackov <boris@codesynthesis.com> | 2010-07-22 14:33:21 +0200 |
commit | cea6fb57ac8c9a893c0f404fef6c1469f0b6222b (patch) | |
tree | fed8b6ffa8ea2cb6347ece69c0cb81003d0ccbf6 /odb/sql-lexer.cxx | |
parent | 5f71c55a1c24c23af1eeb0d664922497a0e5c071 (diff) |
Next chunk of functionality
Add SQL language lexer. Implement MySQL type declaration parser.
Create sub-directories for databases, currently mysql and tracer.
Create MySQL-specific context.
Diffstat (limited to 'odb/sql-lexer.cxx')
-rw-r--r-- | odb/sql-lexer.cxx | 240 |
1 files changed, 240 insertions, 0 deletions
diff --git a/odb/sql-lexer.cxx b/odb/sql-lexer.cxx new file mode 100644 index 0000000..e5b1693 --- /dev/null +++ b/odb/sql-lexer.cxx @@ -0,0 +1,240 @@ +// file : odb/sql-lexer.cxx +// author : Boris Kolpackov <boris@codesynthesis.com> +// copyright : Copyright (c) 2009-2010 Code Synthesis Tools CC +// license : GNU GPL v2; see accompanying LICENSE file + +#include <iostream> + +#include <odb/sql-lexer.hxx> + +using namespace std; + +sql_lexer:: +sql_lexer (std::string const& sql) + : loc_ ("C"), + is_ (sql), + l_ (1), + c_(1), + eos_ (false), + buf_ (0, 0, 0), + unget_ (false) +{ +} + +sql_lexer::xchar sql_lexer:: +peek () +{ + if (unget_) + return buf_; + else + { + if (eos_) + return xchar (xchar::traits_type::eof (), l_, c_); + else + { + xchar::int_type i (is_.peek ()); + + if (i == xchar::traits_type::eof ()) + eos_ = true; + + return xchar (i, l_, c_); + } + } +} + +sql_lexer::xchar sql_lexer:: +get () +{ + if (unget_) + { + unget_ = false; + return buf_; + } + else + { + // When is_.get () returns eof, the failbit is also set (stupid, + // isn't?) which may trigger an exception. To work around this + // we will call peek() first and only call get() if it is not + // eof. But we can only call peek() on eof once; any subsequent + // calls will spoil the failbit (even more stupid). + // + xchar c (peek ()); + + if (!is_eos (c)) + { + is_.get (); + + if (c == '\n') + { + l_++; + c_ = 1; + } + else + c_++; + } + + return c; + } +} + +void sql_lexer:: +unget (xchar c) +{ + // Because iostream::unget cannot work once eos is reached, + // we have to provide our own implementation. + // + buf_ = c; + unget_ = true; +} + +sql_token sql_lexer:: +next () +{ + skip_spaces (); + + xchar c (get ()); + + if (is_eos (c)) + return sql_token (); + + switch (c) + { + case '\'': + { + return string_literal (c); + } + case '\"': + { + return string_literal (c); + } + case '`': + { + return string_literal (c); + } + case ';': + { + return sql_token (sql_token::p_semi); + } + case ',': + { + return sql_token (sql_token::p_comma); + } + case '(': + { + return sql_token (sql_token::p_lparen); + } + case ')': + { + return sql_token (sql_token::p_rparen); + } + case '=': + { + return sql_token (sql_token::p_eq); + } + case '-': + { + return int_literal (get (), true); + } + case '+': + { + return int_literal (get (), false); + } + } + + if (is_alpha (c) || c == '_') + { + return identifier (c); + } + + if (is_dec_digit (c)) + { + return int_literal (c); + } + + ostringstream msg; + msg << "unexpected character '" << c << "'"; + throw invalid_input (c.line (), c.column (), msg.str ()); +} + +void sql_lexer:: +skip_spaces () +{ + for (xchar c (peek ());; c = peek ()) + { + if (is_eos (c) || !is_space (c)) + break; + + get (); + } +} + +sql_token sql_lexer:: +identifier (xchar c) +{ + size_t ln (c.line ()), cl (c.column ()); + string lexeme; + lexeme += c; + + for (c = peek (); + !is_eos (c) && (is_alnum (c) || c == '_'); + c = peek ()) + { + get (); + lexeme += c; + } + + return sql_token (sql_token::t_identifier, lexeme); +} + +sql_token sql_lexer:: +int_literal (xchar c, bool neg, size_t ml, size_t mc) +{ + //size_t ln (neg ? ml : c.line ()), cl (neg ? mc : c.column ()); + string lexeme; + + if (neg) + lexeme += '-'; + + lexeme += c; + + for (c = peek (); !is_eos (c) && is_dec_digit (c); c = peek ()) + { + get (); + lexeme += c; + } + + return sql_token (sql_token::t_int_lit, lexeme); +} + +sql_token sql_lexer:: +string_literal (xchar c) +{ + //size_t ln (c.line ()), cl (c.column ()); + char q (c), p ('\0'); + string lexeme; + lexeme += c; + + while (true) + { + xchar c = get (); + + if (is_eos (c)) + throw invalid_input ( + c.line (), c.column (), "unterminated quoted string"); + + lexeme += c; + + if (c == q && p != '\\') + break; + + // We need to keep track of \\ escapings so we don't confuse + // them with \", as in "\\". + // + if (c == '\\' && p == '\\') + p = '\0'; + else + p = c; + } + + return sql_token (sql_token::t_string_lit, lexeme); +} |