From a599248e9dfab9f5d57c06bed56f75941cb00047 Mon Sep 17 00:00:00 2001 From: Boris Kolpackov Date: Fri, 10 Sep 2021 13:38:03 +0200 Subject: Add multi-argument grouping support in group_scanner --- cli/cli/options.cli | 10 ++- cli/cli/runtime-header.cxx | 13 ++-- cli/cli/runtime-inline.cxx | 2 +- cli/cli/runtime-source.cxx | 171 ++++++++++++++++++++++++++++++++------------- 4 files changed, 138 insertions(+), 58 deletions(-) (limited to 'cli') diff --git a/cli/cli/options.cli b/cli/cli/options.cli index ff462d3..9273845 100644 --- a/cli/cli/options.cli +++ b/cli/cli/options.cli @@ -108,11 +108,15 @@ class options { -f }+ { -b }+ arg +{ f=1 } +{ b=2 } # 'arg' with '-f' 'b' 'f=1' 'b=2' \ - Note that the group applies to a single argument only. For example: + The group applies to a single argument only unless multiple arguments + are themselves grouped with '\cb{{}' and '\cb{\}}'. For example: \ - { --foo }+ arg1 arg2 +{ --bar } # 'arg1' with '--foo' and - # 'arg2' with '--bar' + { --foo }+ arg1 arg2 +{ --bar } # 'arg1' with '--foo' + # 'arg2' with '--bar' + + { --foo }+ { arg1 arg2 } +{ --bar } # 'arg1' with '--foo' '--bar' + # 'arg2' with '--foo' '--bar' \ The group separators ('\cb{{}', '\cb{\}+'}, etc) must be separate command diff --git a/cli/cli/runtime-header.cxx b/cli/cli/runtime-header.cxx index 5bbe5c6..2148941 100644 --- a/cli/cli/runtime-header.cxx +++ b/cli/cli/runtime-header.cxx @@ -605,21 +605,20 @@ generate_runtime_header (context& ctx) << "static separator" << endl << "sense (const char*);" << endl - << "// If the state is scanned or skipped, then scan the" << endl - << "// leading groups and save the next (unescaped) argument in" << endl - << "// arg_. If the state is peeked, then scan the trailing" << endl - << "// groups. In both cases set the new state." << endl + + << "// Scan the leading groups, the next argument/argument pack,"<< endl + << "// and the trailing groups." << endl << "//" << endl << "void" << endl - << "scan_group (state);" + << "scan_group ();" << endl << "scanner& scan_;" << "state state_;" << endl << "// Circular buffer of two arguments." << endl << "//" << endl - << "std::string arg_[2];" - << "std::size_t i_;" + << "std::vector arg_[2];" + << "std::size_t i_, j_, pos_;" << endl << "std::vector group_;" << "vector_scanner group_scan_;" diff --git a/cli/cli/runtime-inline.cxx b/cli/cli/runtime-inline.cxx index e3dce1b..ce18e92 100644 --- a/cli/cli/runtime-inline.cxx +++ b/cli/cli/runtime-inline.cxx @@ -421,7 +421,7 @@ generate_runtime_inline (context& ctx) << inl << "group_scanner::" << endl << "group_scanner (scanner& s)" << endl - << ": scan_ (s), state_ (skipped), i_ (1), group_scan_ (group_)" + << ": scan_ (s), state_ (skipped), i_ (1), j_ (0), group_scan_ (group_)" << "{" << "}" diff --git a/cli/cli/runtime-source.cxx b/cli/cli/runtime-source.cxx index 81eab4a..d5334a0 100644 --- a/cli/cli/runtime-source.cxx +++ b/cli/cli/runtime-source.cxx @@ -732,49 +732,50 @@ generate_runtime_source (context& ctx, bool complete) << "if (state_ == scanned)" << "{" << "if (group_scan_.end () != group_.size ())" << endl - << "throw unexpected_group (arg_[i_], group_scan_.next ());" + << "throw unexpected_group (arg_[i_][j_], group_scan_.next ());" << "}" - << "return scan_.more ();" + << "return j_ != 0 || scan_.more ();" << "}" << "const char* group_scanner::" << endl << "peek ()" << "{" - << "if (state_ != peeked)" << endl - << "scan_group (peeked);" - << "scan_.peek ();" + << "if (state_ != peeked)" + << "{" + << "scan_group ();" + << "state_ = peeked;" + << "}" << "// Return unescaped." << endl - << "return arg_[i_].c_str ();" + << "return arg_[i_][j_ - 1].c_str ();" << "}" << "const char* group_scanner::" << endl << "next ()" << "{" << "if (state_ != peeked)" << endl - << "scan_group (peeked);" - << "scan_.next ();" - << "scan_group (scanned);" + << "scan_group ();" + << "state_ = scanned;" << "// Return unescaped." << endl - << "return arg_[i_].c_str ();" + << "return arg_[i_][--j_].c_str ();" << "}" << "void group_scanner::" << endl << "skip ()" << "{" << "if (state_ != peeked)" << endl - << "scan_group (peeked);" - << "scan_.skip ();" - << "scan_group (skipped);" + << "scan_group ();" + << "state_ = skipped;" + << "--j_;" << "}" << "std::size_t group_scanner::" << endl << "position ()" << "{" - << "return scan_.position ();" + << "return j_ == 0 ? scan_.position () : pos_ + (arg_[i_].size () - j_);" << "}" << "void group_scanner::" << endl - << "scan_group (state st)" + << "scan_group ()" << "{" << "// If the previous argument has been scanned, then make" << endl << "// sure the group has been scanned (handled) as well." << endl @@ -782,58 +783,66 @@ generate_runtime_source (context& ctx, bool complete) << "if (state_ == scanned)" << "{" << "if (group_scan_.end () != group_.size ())" << endl - << "throw unexpected_group (arg_[i_], group_scan_.next ());" + << "throw unexpected_group (arg_[i_][j_], group_scan_.next ());" << "}" + << "// If we still have arguments in the pack, rewind the group." << endl + << "//" << endl + << "if (j_ != 0)" + << "{" + << "group_scan_.reset ();" + << "return;" + << "}" + + // Position must remain the same from before the first call to peek() + // (comes directly from the scanner) and until next(). + // // Note that while it may seem like a good idea to pass // scan_.position() to reset() below, the trailing group positions // will overlap with the argument's. So it seems best to start // positions of each argument in a group from 0. // - << "if (state_ != peeked)" - << "{" - << "arg_[i_ == 0 ? ++i_ : --i_].clear ();" - << "group_.clear ();" - << "group_scan_.reset ();" - << "}" - - << "// We recognize all group sequences both before and " << endl - << "// after the argument and diagnose any misuse. We may" << endl - << "// also have multiple groups:" << endl - << "//" << endl - << "// { -x }+ { -y }+ arg" << endl - << "//" << endl + // Note also that we try hard not to throw away allocated memory in + // arg_[][0]. + // + << "i_ += (i_ == 0 ? 1 : -1);" + << "group_.clear ();" + << "group_scan_.reset ();" + << "pos_ = scan_.position ();" << endl - << "// Using group_ won't cover empty groups." << endl + + << "// Note: using group_ won't cover empty groups and using" << endl + << "// j_ won't cover single-argument packs." << endl << "//" << endl - << "bool g (false);" + << "bool group (false), pack (false);" << endl - << "while (scan_.more ())" + << "do" << "{" - << "const char* a (scan_.peek ());" + << "const char* a (scan_.next ());" << "size_t i (*a == '\\\\' ? 1 : 0);" << "separator s (sense (a + i));" << endl << "if (s == none || i != 0)" << "{" - << "if (state_ != peeked)" << endl - << "arg_[i_] = a + (s != none ? i : 0);" + << "if (arg_[i_].size () != 1)" << endl + << "arg_[i_].resize (1);" + << endl + << "arg_[i_][0] = a + (s != none ? i : 0);" + << "j_ = 1;" << "break;" << "}" - << "// Start of a leading group for the next argument." << endl + << "// Start of a leading group for the next argument or" << endl + << "// argument pack. We will only know which once we see" << endl + << "// the closing separator." << endl << "//" << endl - << "if (s == open && state_ == peeked)" << endl - << "break;" - << endl - << "if (s != (state_ == peeked ? open_plus : open))" << endl + << "if (s != open)" << endl << "throw group_separator (a, \"\");" << endl - << "g = true;" + << "size_t n (group_.size ());" << endl << "// Scan the group until the closing separator." << endl << "//" << endl - << "scan_.next ();" << "s = none;" << "while (s == none && scan_.more ())" << "{" @@ -848,20 +857,88 @@ generate_runtime_source (context& ctx, bool complete) << "}" << "}" - << "if (s != (state_ == peeked ? close : close_plus))" + << "if (s == close)" << "{" - << "throw group_separator ((s != none ? a : \"\")," << endl - << "(state_ == peeked ? \"}\" : \"}+\"));" + << "size_t m (group_.size ());" + << endl + << "j_ = m - n;" + << "if (j_ == 0)" << endl + << "throw group_separator (\"{\", \"\");" + << endl + << "if (arg_[i_].size () != j_)" << endl + << "arg_[i_].resize (j_);" + << endl + << "// Move from group_ to arg_. Add in reverse for ease " << endl + << "// of iteration." << endl + << "//" << endl + << "for (size_t j (0); j != j_; ++j)" << endl + << "arg_[i_][j] = group_[m - j - 1];" + << "group_.resize (n);" + << endl + << "pack = true;" + << "break;" << "}" + << "else if (s == close_plus)" << endl + << "group = true;" + << "else" << endl + << "throw group_separator ((s != none ? a : \"\"), \"}+\");" << "}" + << "while (scan_.more ());" + << endl << "// Handle the case where we have seen the leading group" << endl << "// but there are no more arguments." << endl << "//" << endl - << "if (g && state_ != peeked && !scan_.more ())" << endl + << "if (group && j_ == 0)" << endl << "throw group_separator (\"{\", \"\");" << endl - << "state_ = st;" + << "// Handle trailing groups, if any." << endl + << "//" << endl + + << "while (scan_.more ())" + << "{" + << "const char* a (scan_.peek ());" + << "size_t i (*a == '\\\\' ? 1 : 0);" + << "separator s (sense (a + i));" + << endl + + << "// Next argument, argument pack, or leading group." << endl + << "//" << endl + << "if (s == none || s == open || i != 0)" << endl + << "break;" + << endl + << "if (s != open_plus)" << endl + << "throw group_separator (a, \"\");" + << endl + << "group = true;" + << endl + << "// Scan the group until the closing separator." << endl + << "//" << endl + << "scan_.next ();" + << "s = none;" + << "while (s == none && scan_.more ())" + << "{" + << "a = scan_.next ();" + << "i = (*a == '\\\\' ? 1 : 0);" + << "s = sense (a + i);" + << endl + << "if (s == none || i != 0)" + << "{" + << "group_.push_back (a + (s != none ? i : 0));" + << "s = none;" + << "}" + << "}" + + << "if (s != close)" << endl + << "throw group_separator ((s != none ? a : \"\"), \"}\");" + << "}" + + << "// Handle the case where we have seen the argument pack" << endl + << "// without leading or trailing group." << endl + << "//" << endl + << "if (pack && !group)" << endl + << "throw group_separator (\"{\", \"\");" + << "}"; } -- cgit v1.1