From 29c8ce737f513766673fd3e57e30233d9fcce159 Mon Sep 17 00:00:00 2001 From: Boris Kolpackov Date: Sun, 13 Sep 2009 14:02:04 +0200 Subject: Add C++ source code indenter --- cutl/compiler/cxx-indenter.cxx | 50 +++ cutl/compiler/cxx-indenter.hxx | 169 ++++++++++ cutl/compiler/cxx-indenter.ixx | 70 ++++ cutl/compiler/cxx-indenter.txx | 710 +++++++++++++++++++++++++++++++++++++++++ 4 files changed, 999 insertions(+) create mode 100644 cutl/compiler/cxx-indenter.cxx create mode 100644 cutl/compiler/cxx-indenter.hxx create mode 100644 cutl/compiler/cxx-indenter.ixx create mode 100644 cutl/compiler/cxx-indenter.txx (limited to 'cutl/compiler') diff --git a/cutl/compiler/cxx-indenter.cxx b/cutl/compiler/cxx-indenter.cxx new file mode 100644 index 0000000..2173942 --- /dev/null +++ b/cutl/compiler/cxx-indenter.cxx @@ -0,0 +1,50 @@ +// file : cutl/compiler/cxx-indenter.cxx +// author : Boris Kolpackov +// copyright : Copyright (c) 2009 Code Synthesis Tools CC +// license : MIT; see accompanying LICENSE file + +#include + +namespace cutl +{ + namespace compiler + { + template<> + char const* cxx_indenter:: + keyword (cxx_indenter::keyword_type t) + { + static char const* keywords[] = + { + "if", + "do", + "for", + "else", + "case", + "while", + "catch", + "default" + }; + + return keywords[t]; + } + + template<> + wchar_t const* cxx_indenter:: + keyword (cxx_indenter::keyword_type t) + { + static wchar_t const* keywords[] = + { + L"if", + L"do", + L"for", + L"else", + L"case", + L"while", + L"catch", + L"default" + }; + + return keywords[t]; + } + } +} diff --git a/cutl/compiler/cxx-indenter.hxx b/cutl/compiler/cxx-indenter.hxx new file mode 100644 index 0000000..0e5e2a1 --- /dev/null +++ b/cutl/compiler/cxx-indenter.hxx @@ -0,0 +1,169 @@ +// file : cutl/compiler/cxx-indenter.hxx +// author : Boris Kolpackov +// copyright : Copyright (c) 2009 Code Synthesis Tools CC +// license : MIT; see accompanying LICENSE file + +#ifndef CUTL_COMPILER_CXX_INDENTER_HXX +#define CUTL_COMPILER_CXX_INDENTER_HXX + +#include +#include +#include +#include // std::size_t + +#include + +namespace cutl +{ + namespace compiler + { + template + class cxx_indenter: public code_stream + { + public: + cxx_indenter (code_stream& out); + + private: + cxx_indenter (cxx_indenter const&); + + cxx_indenter& + operator= (cxx_indenter const&); + + public: + virtual void + put (C); + + virtual void + unbuffer (); + + private: + typedef std::basic_string string; + + enum construct + { + con_other, + con_pp_dir, + con_c_com, + con_cxx_com, + con_string_lit, + con_char_lit + }; + + private: + void + next_token (string const& old, C); + + void + ensure_new_line (); + + void + output_indentation (); + + void + write (C); + + private: + void + tokenize (C, construct old); + + void + retire (C); + + private: + enum char_class_type + { + cc_alpha, // Alpha + '_'. + cc_digit, + cc_op_punc, // Operator or punctuation. + cc_space + }; + + static char_class_type + char_class (C); + + private: + enum keyword_type + { + kw_if, + kw_do, + kw_for, + kw_else, + kw_case, + kw_while, + kw_catch, + kw_default + }; + + static C const* + keyword (keyword_type); + + private: + code_stream& out_; + bool buffering_; // True if write() should buffer the char. + std::size_t position_; // Current position on the line. + std::size_t paren_balance_; // ( ) balance. + std::stack indentation_; + std::size_t spaces_; + bool suppress_nl_; + construct construct_; + + // Special state stach for the do-while construct. The presence + // of an element in the stack indicates that we are in a braced + // do-while construct. The value of the element is the brace + // balance. + std::stack do_while_state_; + + typedef std::deque hold; + hold hold_; + + private: + string token_; // previously fully recognized token + string lexeme_; // current lexeme (accumulator) + + // Keywords that may be folowed by a single-line block, e.g., if, + // else, etc. + // + std::set single_line_blocks_; + + // Keywords that may follow (and be related) to a previous block, + // e.g., else, case, catch. + // + std::set follow_blocks_; + + string do_; + string lbrace_; + string rbrace_; + + private: + // Single-line indented blocks such as if, else, while, etc. The + // newline flag indicates whether a new line has been seen after + // the keyword. This is needed to properly distinguish cases such + // as: + // + // else if (...) + // foo (); + // + // else + // if (...) + // foo (); + // + struct indent_block + { + indent_block (bool newline, bool indented) + : newline_ (newline), indented_ (indented) + { + } + + bool newline_; + bool indented_; + }; + + std::stack indent_stack_; + }; + } +} + +#include +#include + +#endif // CUTL_COMPILER_CXX_INDENTER_HXX diff --git a/cutl/compiler/cxx-indenter.ixx b/cutl/compiler/cxx-indenter.ixx new file mode 100644 index 0000000..fde4652 --- /dev/null +++ b/cutl/compiler/cxx-indenter.ixx @@ -0,0 +1,70 @@ +// file : cutl/compiler/cxx-indenter.ixx +// author : Boris Kolpackov +// copyright : Copyright (c) 2009 Code Synthesis Tools CC +// license : MIT; see accompanying LICENSE file + +namespace cutl +{ + namespace compiler + { + template + inline typename cxx_indenter::char_class_type cxx_indenter:: + char_class (C c) + { + switch (c) + { + case '0': + case '1': + case '2': + case '3': + case '4': + case '5': + case '6': + case '7': + case '8': + case '9': + return cc_digit; + + case '!': + case '%': + case '^': + case '&': + case '*': + case '(': + case ')': + case '-': + case '+': + case '=': + case '{': + case '}': + case '|': + case '~': + case '[': + case ']': + case '\\': + case ';': + case '\'': + case ':': + case '"': + case '<': + case '>': + case '?': + case ',': + case '.': + case '/': + return cc_op_punc; + + case ' ': + case '\n': + case '\t': + case '\f': + case '\r': + case '\v': + return cc_space; + + default: + return cc_alpha; + } + } + } +} diff --git a/cutl/compiler/cxx-indenter.txx b/cutl/compiler/cxx-indenter.txx new file mode 100644 index 0000000..73e3ceb --- /dev/null +++ b/cutl/compiler/cxx-indenter.txx @@ -0,0 +1,710 @@ +// file : cutl/compiler/cxx-indenter.txx +// author : Boris Kolpackov +// copyright : Copyright (c) 2009 Code Synthesis Tools CC +// license : MIT; see accompanying LICENSE file + +namespace cutl +{ + namespace compiler + { + template + cxx_indenter:: + cxx_indenter (code_stream& out) + : out_ (out), + buffering_ (false), + position_ (0), + paren_balance_ (0), + spaces_ (2), + construct_ (con_other), + do_ (keyword(kw_do)), + lbrace_ (1, '{'), + rbrace_ (1, '}') + { + indentation_.push (0); + + single_line_blocks_.insert (keyword(kw_if)); + single_line_blocks_.insert (keyword(kw_do)); + single_line_blocks_.insert (keyword(kw_for)); + single_line_blocks_.insert (keyword(kw_else)); + single_line_blocks_.insert (keyword(kw_case)); + single_line_blocks_.insert (keyword(kw_while)); + + follow_blocks_.insert (keyword(kw_else)); + follow_blocks_.insert (keyword(kw_case)); + follow_blocks_.insert (keyword(kw_catch)); + follow_blocks_.insert (keyword(kw_default)); + } + + template + void cxx_indenter:: + put (C c) + { + // First determine what kind of construct we are in. + // + construct new_con (construct_); + construct old_con (construct_); + + switch (c) + { + case '\n': + { + if (construct_ == con_pp_dir || + construct_ == con_cxx_com) + construct_ = new_con = con_other; + + break; + } + case '#': + { + if (construct_ == con_other) + construct_ = new_con = con_pp_dir; + + break; + } + case '\"': + { + if (construct_ != con_pp_dir && + construct_ != con_c_com && + construct_ != con_cxx_com && + construct_ != con_char_lit && + (hold_.empty () || hold_.back () != '\\')) + { + // Not an escape sequence. + // + if (construct_ == con_string_lit) + new_con = con_other; + else + construct_ = new_con = con_string_lit; + } + + break; + } + case '\'': + { + if (construct_ != con_pp_dir && + construct_ != con_c_com && + construct_ != con_cxx_com && + construct_ != con_string_lit && + (hold_.empty () || hold_.back () != '\\')) + { + // Not an escape sequence. + // + if (construct_ == con_char_lit) + new_con = con_other; + else + construct_ = new_con = con_char_lit; + } + + break; + } + case '/': + { + if (construct_ == con_other) + { + if (!hold_.empty () && hold_.back () == '/') + construct_ = new_con = con_cxx_com; + } + + if (construct_ == con_c_com) + { + if (!hold_.empty () && hold_.back () == '*') + construct_ = new_con = con_other; + } + + break; + } + case '*': + { + if (construct_ == con_other) + { + if (!hold_.empty () && hold_.back () == '/') + construct_ = new_con = con_c_com; + } + + break; + } + default: + { + break; + } + } + + // Special handling of CPP directives. + // + if (construct_ == con_pp_dir) + { + write (c); + position_++; + return; + } + + // + // + tokenize (c, old_con); + + + // Indentation in parenthesis. We don't need to make sure + // we are not in a comments, etc., because we make sure we + // don't hold anything in those states. + // + if (!hold_.empty () && hold_.back () == '(') + { + unbuffer (); // We don't need to hold it anymore. + + if (c == '\n') + indentation_.push (indentation_.top () + spaces_); + else + indentation_.push (position_); + } + + + // + // + bool defaulting (false); + + switch (c) + { + case '\n': + { + if (!indent_stack_.empty () && construct_ == con_other) + indent_stack_.top ().newline_ = true; + + hold_.push_back (c); + position_ = 0; // Starting a new line. + + break; + } + case '{': + { + if (construct_ == con_other) + { + if (!indent_stack_.empty ()) + { + // Pop all the blocks until the one that was indented. + // + while (!indent_stack_.top ().indented_) + indent_stack_.pop (); + + if (indentation_.size () > 1) + indentation_.pop (); + + indent_stack_.pop (); + } + + ensure_new_line (); + output_indentation (); + write (c); + ensure_new_line (); + + indentation_.push (indentation_.top () + spaces_); + } + else + defaulting = true; + + break; + } + case '}': + { + if (construct_ == con_other) + { + if (indentation_.size () > 1) + indentation_.pop (); + + // Reduce multiple newlines to one. + // + while (hold_.size () > 1) + { + typename hold::reverse_iterator i (hold_.rbegin ()); + + if (*i == '\n' && *(i + 1) == '\n') + hold_.pop_back (); + else + break; + } + + ensure_new_line (); + output_indentation (); + + hold_.push_back (c); + + + // Add double newline after '}'. + // + hold_.push_back ('\n'); + hold_.push_back ('\n'); + position_ = 0; + + buffering_ = true; + } + else + defaulting = true; + + break; + } + case ';': + { + if (construct_ == con_other) + { + // for (;;) + // + if (!indent_stack_.empty () && paren_balance_ == 0) + { + // Pop all the blocks until the one that was indented. + // + while (!indent_stack_.top ().indented_) + indent_stack_.pop (); + + if (indentation_.size () > 1) + indentation_.pop (); + + indent_stack_.pop (); + } + + if (paren_balance_ != 0) + { + // We are inside for (;;) statement. Nothing to do here. + // + defaulting = true; + } + else + { + // Handling '};' case. + // + + bool brace (false); + + if (hold_.size () > 1 && hold_.back () == '\n') + { + bool pop_nl (false); + + for (typename hold::reverse_iterator + i (hold_.rbegin ()), e (hold_.rend ()); + i != e; ++i) + { + if (*i != '\n') + { + if (*i == '}') + brace = pop_nl = true; + + break; + } + } + + if (pop_nl) + while (hold_.back () == '\n') + hold_.pop_back (); + } + + output_indentation (); + write (c); + position_++; + + if (brace) + { + hold_.push_back ('\n'); + hold_.push_back ('\n'); + } + + ensure_new_line (); + } + } + else + defaulting = true; + + break; + } + case ' ': + { + if (construct_ == con_other) + { + // Handling '} foo_;' case. + // + if (hold_.size () > 1 && hold_.back () == '\n') + { + bool pop_nl (false); + + for (typename hold::reverse_iterator + i (hold_.rbegin ()), e (hold_.rend ()); + i != e; ++i) + { + if (*i != '\n') + { + if (*i == '}') + pop_nl = true; + + break; + } + } + + if (pop_nl) + while (hold_.back () == '\n') + hold_.pop_back (); + } + } + + defaulting = true; + break; + } + case '\\': + { + if (construct_ != con_pp_dir && + construct_ != con_c_com && + construct_ != con_cxx_com) + { + output_indentation (); + hold_.push_back (c); + position_++; + } + else + defaulting = true; + + break; + + } + case '(': + { + if (construct_ == con_other) + { + // Hold it so that we can see what's coming next. + // + output_indentation (); + hold_.push_back (c); + position_++; + paren_balance_++; + } + else + defaulting = true; + break; + } + case ')': + { + if (construct_ == con_other) + { + if (indentation_.size () > 1) + indentation_.pop (); + + if (paren_balance_ > 0) + paren_balance_--; + } + + defaulting = true; + break; + } + case '/': + { + if (construct_ == con_other) + { + output_indentation (); + hold_.push_back (c); + position_++; + } + else + defaulting = true; + + break; + } + case '*': + { + if (construct_ == con_c_com) + { + output_indentation (); + hold_.push_back (c); + position_++; + } + else + defaulting = true; + + break; + } + default: + { + defaulting = true; + break; + } + } + + + if (defaulting) + { + output_indentation (); + write (c); + position_++; + } + + construct_ = new_con; + } + + template + void cxx_indenter:: + unbuffer () + { + for (; !hold_.empty (); hold_.pop_front ()) + out_.put (hold_.front ()); + } + + template + void cxx_indenter:: + next_token (string const& old, C c) + { + // Handle one line indentation blocks (if, else, etc). + // + if (single_line_blocks_.find (token_) != single_line_blocks_.end ()) + { + // Only indent sub-blocks if we are on a new line. + // + bool indent (indent_stack_.empty () || + indent_stack_.top ().newline_); + + indent_stack_.push (indent_block (c == '\n', indent)); + + if (indent) + indentation_.push (indentation_.top () + spaces_); + } + + // Keep track of the do ... while construct in order to suppress + // the newline after } and before while. + // + if (old == do_ && token_ == lbrace_) + do_while_state_.push (0); + + if (!do_while_state_.empty ()) + { + if (token_ == lbrace_) + do_while_state_.top ()++; + + if (token_ == rbrace_) + do_while_state_.top ()--; + } + + // Suppress double newline in the "}else", etc., cases. + // + if (old == rbrace_) + { + bool dw (!do_while_state_.empty () && do_while_state_.top () == 0); + + if (follow_blocks_.find (token_) != follow_blocks_.end () || dw) + { + if (dw) + do_while_state_.pop (); + + // Reduce double newline after "}" into a single one. + // + typename hold::iterator i (hold_.end ()), b (hold_.begin ()); + + for (--i; i != b; --i) + { + // See if this is the end of the "}\n\n" sequence. + // + if (*i == '\n') + { + --i; + if (i != b && *i == '\n') + { + --i; + if (*i == '}') + { + ++i; + hold_.erase (i); + break; + } + } + } + } + } + else if (token_ != rbrace_) + { + buffering_ = false; + } + } + } + + template + void cxx_indenter:: + ensure_new_line () + { + if (hold_.empty () || hold_.back () != '\n') + { + hold_.push_back ('\n'); + position_ = 0; // Starting a new line. + } + } + + + template + void cxx_indenter:: + output_indentation () + { + if (!hold_.empty () && hold_.back () == '\n') + { + for (std::size_t i (0); i < indentation_.top (); ++i) + write (' '); + + position_ += indentation_.top (); + } + } + + template + void cxx_indenter:: + write (C c) + { + hold_.push_back (c); + + if (!buffering_) + { + for (; !hold_.empty (); hold_.pop_front ()) + out_.put (hold_.front ()); + } + } + + template + void cxx_indenter:: + tokenize (C c, construct old) + { + // + // + switch (construct_) + { + case con_pp_dir: + { + if (old == con_other) // Start PP directive + retire (c); + + return; + } + case con_c_com: + { + if (old == con_other) // Start C comment. + lexeme_.clear (); + + return; + } + case con_cxx_com: + { + if (old == con_other) // Start C++ comment. + lexeme_.clear (); + + return; + } + case con_string_lit: + { + if (old == con_other) // Start string literal + retire (c); + + lexeme_ += c; + return; + } + case con_char_lit: + { + if (old == con_other) // Start char literal + retire (c); + + lexeme_ += c; + return; + } + default: + break; + } + + // construct_ == other + // + switch (old) + { + case con_pp_dir: + { + // End PP directive (newline). + // + return; + } + case con_c_com: + { + // End C comment. + // + return; + } + case con_cxx_com: + { + // End C++ comment (newline). + // + return; + } + case con_string_lit: + { + // End string literal ("). + // + lexeme_ += c; + return; + } + case con_char_lit: + { + // End char literal ('). + // + lexeme_ += c; + return; + } + default: + break; + } + + + // construct_ == old == other + // + + switch (char_class (c)) + { + case cc_alpha: + { + if (lexeme_.empty () || + char_class (lexeme_[0]) == cc_alpha) + lexeme_ += c; + else + { + retire (c); + lexeme_ += c; + } + break; + } + case cc_digit: + { + if (lexeme_.empty ()) + lexeme_ += c; + else + { + char_class_type cc (char_class (lexeme_[0])); + + if (cc == cc_alpha || cc == cc_digit) + lexeme_ += c; + else + { + retire (c); + lexeme_ += c; + } + } + break; + } + case cc_op_punc: + { + retire (c); + lexeme_ += c; + break; + } + case cc_space: + { + retire (c); + break; + } + } + } + + template + void cxx_indenter:: + retire (C c) + { + if (!lexeme_.empty ()) + { + token_.swap (lexeme_); + next_token (lexeme_, c); + lexeme_.clear (); + } + } + } +} -- cgit v1.1