From a599248e9dfab9f5d57c06bed56f75941cb00047 Mon Sep 17 00:00:00 2001 From: Boris Kolpackov Date: Fri, 10 Sep 2021 13:38:03 +0200 Subject: Add multi-argument grouping support in group_scanner --- cli-tests/group/driver.cxx | 41 +++++++++-- cli-tests/group/testscript | 84 ++++++++++++++++++++-- cli/cli/options.cli | 10 ++- cli/cli/runtime-header.cxx | 13 ++-- cli/cli/runtime-inline.cxx | 2 +- cli/cli/runtime-source.cxx | 171 ++++++++++++++++++++++++++++++++------------- 6 files changed, 254 insertions(+), 67 deletions(-) diff --git a/cli-tests/group/driver.cxx b/cli-tests/group/driver.cxx index 68f6107..01abf23 100644 --- a/cli-tests/group/driver.cxx +++ b/cli-tests/group/driver.cxx @@ -5,6 +5,8 @@ // Test group_scanner. // +#include +#include #include #include "test.hxx" @@ -25,27 +27,58 @@ main (int argc, char* argv[]) // string m (argv[1]); + bool sa (m.find ('s') != string::npos); + bool sg (m.find ('g') != string::npos); + argv_scanner as (--argc, ++argv); group_scanner s (as); + // Verify previous two args are still valid for good measure. + // + const char* prev_a (0); + string prev_s; + + // Verify position. + // + size_t pos (0); // argv_scanner starts from 1. + while (s.more ()) { - if (m.find ('s') == string::npos) + assert (pos < s.position ()); + pos = s.position (); + + s.peek (); + assert (pos == s.position ()); + + const char* a; + if (!sa) { - const char* a (s.next ()); + a = s.next (); cout << "'" << a << "'"; } else s.skip (); - if (m.find ('g') == string::npos) + if (!sg) { scanner& gs (s.group ()); while (gs.more ()) cout << " '" << gs.next () << "'"; } - cout << endl; + if (!sa || !sg) + cout << endl; + + if (!sa && !sg) + { + s.more (); + + if (prev_a != 0) + assert (prev_a == prev_s); + + prev_a = a; + prev_s = a; + } } return 0; diff --git a/cli-tests/group/testscript b/cli-tests/group/testscript index 6269ca2..0b1d939 100644 --- a/cli-tests/group/testscript +++ b/cli-tests/group/testscript @@ -19,6 +19,14 @@ $* '' { --foo --bar }+ arg1 arg2 >>EOO 'arg2' EOO +: group-pre-pack +: +$* '' { --foo --bar }+ { arg1 arg2 } arg3 >>EOO +'arg1' '--foo' '--bar' +'arg2' '--foo' '--bar' +'arg3' +EOO + : group-pre-multi : $* '' { --foo }+ { --bar }+ arg1 arg2 >>EOO @@ -26,6 +34,14 @@ $* '' { --foo }+ { --bar }+ arg1 arg2 >>EOO 'arg2' EOO +: group-pre-multi-pack +: +$* '' { --foo }+ { --bar }+ { arg1 arg2 } arg3 >>EOO +'arg1' '--foo' '--bar' +'arg2' '--foo' '--bar' +'arg3' +EOO + : group-post : $* '' arg1 arg2 +{ foo bar } >>EOO @@ -33,6 +49,14 @@ $* '' arg1 arg2 +{ foo bar } >>EOO 'arg2' 'foo' 'bar' EOO +: group-post-pack +: +$* '' arg1 { arg2 arg3 } +{ foo bar } >>EOO +'arg1' +'arg2' 'foo' 'bar' +'arg3' 'foo' 'bar' +EOO + : group-post-multi : $* '' arg1 arg2 +{ foo } +{ bar } >>EOO @@ -40,6 +64,14 @@ $* '' arg1 arg2 +{ foo } +{ bar } >>EOO 'arg2' 'foo' 'bar' EOO +: group-post-multi-pack +: +$* '' arg1 { arg2 arg3 } +{ foo } +{ bar } >>EOO +'arg1' +'arg2' 'foo' 'bar' +'arg3' 'foo' 'bar' +EOO + : group-both : $* '' arg1 { --foo --bar }+ arg2 +{ foo bar } arg3 >>EOO @@ -48,6 +80,15 @@ $* '' arg1 { --foo --bar }+ arg2 +{ foo bar } arg3 >>EOO 'arg3' EOO +: group-both-pack +: +$* '' arg1 { --foo --bar }+ { arg2 arg3 } +{ foo bar } arg4 >>EOO +'arg1' +'arg2' '--foo' '--bar' 'foo' 'bar' +'arg3' '--foo' '--bar' 'foo' 'bar' +'arg4' +EOO + : group-both-multi : $* '' arg1 { --foo }+ { --bar }+ arg2 +{ foo } +{ bar } arg3 >>EOO @@ -56,6 +97,15 @@ $* '' arg1 { --foo }+ { --bar }+ arg2 +{ foo } +{ bar } arg3 >>EOO 'arg3' EOO +: group-both-multi-pack +: +$* '' arg1 { --foo }+ { --bar }+ { arg2 arg3 } +{ foo } +{ bar } arg4 >>EOO +'arg1' +'arg2' '--foo' '--bar' 'foo' 'bar' +'arg3' '--foo' '--bar' 'foo' 'bar' +'arg4' +EOO + : multi-group : $* '' { --foo }+ arg1 arg2 +{ bar } >>EOO @@ -63,6 +113,15 @@ $* '' { --foo }+ arg1 arg2 +{ bar } >>EOO 'arg2' 'bar' EOO +: multi-group-pack +: +$* '' { --foo }+ { arg1 arg2 } { arg3 arg4 } +{ bar } >>EOO +'arg1' '--foo' +'arg2' '--foo' +'arg3' 'bar' +'arg4' 'bar' +EOO + : empty-group : $* '' { }+ arg1 arg2 +{ } >>EOO @@ -70,6 +129,13 @@ $* '' { }+ arg1 arg2 +{ } >>EOO 'arg2' EOO +: empty-group-pack +: +$* '' { }+ { arg1 arg2 } +{ } >>EOO +'arg1' +'arg2' +EOO + : escape-arg : $* '' '\{' '\}' '\+{' '\}+' '{x' '}x' >>EOO @@ -87,10 +153,16 @@ $* '' { '\{' '\}' '\+{' '\}+' '{x' '}x' }+ arg >>EOO 'arg' '{' '}' '+{' '}+' '{x' '}x' EOO -: not-group +: pack-no-group : -$* '' { --foo } 2>>EOE != 0 -expected group separator '}+' instead of '}', use '\}' to escape +$* '' { --foo } { arg2 }+ 2>>EOE != 0 +unexpected group separator '{', use '\{' to escape +EOE + +: empty-pack +: +$* '' { --foo }+ { } 2>>EOE != 0 +unexpected group separator '{', use '\{' to escape EOE : no-arg-pre @@ -135,6 +207,8 @@ EOE : unhandled-group-skip : -$* 'sg' { --foo }+ arg +{ bar } >>EOO +$* 'sg' { --foo }+ arg +{ bar } -EOO +: unhandled-group-skip-pack +: +$* 'sg' { --foo }+ { arg1 arg2 } +{ bar } diff --git a/cli/cli/options.cli b/cli/cli/options.cli index ff462d3..9273845 100644 --- a/cli/cli/options.cli +++ b/cli/cli/options.cli @@ -108,11 +108,15 @@ class options { -f }+ { -b }+ arg +{ f=1 } +{ b=2 } # 'arg' with '-f' 'b' 'f=1' 'b=2' \ - Note that the group applies to a single argument only. For example: + The group applies to a single argument only unless multiple arguments + are themselves grouped with '\cb{{}' and '\cb{\}}'. For example: \ - { --foo }+ arg1 arg2 +{ --bar } # 'arg1' with '--foo' and - # 'arg2' with '--bar' + { --foo }+ arg1 arg2 +{ --bar } # 'arg1' with '--foo' + # 'arg2' with '--bar' + + { --foo }+ { arg1 arg2 } +{ --bar } # 'arg1' with '--foo' '--bar' + # 'arg2' with '--foo' '--bar' \ The group separators ('\cb{{}', '\cb{\}+'}, etc) must be separate command diff --git a/cli/cli/runtime-header.cxx b/cli/cli/runtime-header.cxx index 5bbe5c6..2148941 100644 --- a/cli/cli/runtime-header.cxx +++ b/cli/cli/runtime-header.cxx @@ -605,21 +605,20 @@ generate_runtime_header (context& ctx) << "static separator" << endl << "sense (const char*);" << endl - << "// If the state is scanned or skipped, then scan the" << endl - << "// leading groups and save the next (unescaped) argument in" << endl - << "// arg_. If the state is peeked, then scan the trailing" << endl - << "// groups. In both cases set the new state." << endl + + << "// Scan the leading groups, the next argument/argument pack,"<< endl + << "// and the trailing groups." << endl << "//" << endl << "void" << endl - << "scan_group (state);" + << "scan_group ();" << endl << "scanner& scan_;" << "state state_;" << endl << "// Circular buffer of two arguments." << endl << "//" << endl - << "std::string arg_[2];" - << "std::size_t i_;" + << "std::vector arg_[2];" + << "std::size_t i_, j_, pos_;" << endl << "std::vector group_;" << "vector_scanner group_scan_;" diff --git a/cli/cli/runtime-inline.cxx b/cli/cli/runtime-inline.cxx index e3dce1b..ce18e92 100644 --- a/cli/cli/runtime-inline.cxx +++ b/cli/cli/runtime-inline.cxx @@ -421,7 +421,7 @@ generate_runtime_inline (context& ctx) << inl << "group_scanner::" << endl << "group_scanner (scanner& s)" << endl - << ": scan_ (s), state_ (skipped), i_ (1), group_scan_ (group_)" + << ": scan_ (s), state_ (skipped), i_ (1), j_ (0), group_scan_ (group_)" << "{" << "}" diff --git a/cli/cli/runtime-source.cxx b/cli/cli/runtime-source.cxx index 81eab4a..d5334a0 100644 --- a/cli/cli/runtime-source.cxx +++ b/cli/cli/runtime-source.cxx @@ -732,49 +732,50 @@ generate_runtime_source (context& ctx, bool complete) << "if (state_ == scanned)" << "{" << "if (group_scan_.end () != group_.size ())" << endl - << "throw unexpected_group (arg_[i_], group_scan_.next ());" + << "throw unexpected_group (arg_[i_][j_], group_scan_.next ());" << "}" - << "return scan_.more ();" + << "return j_ != 0 || scan_.more ();" << "}" << "const char* group_scanner::" << endl << "peek ()" << "{" - << "if (state_ != peeked)" << endl - << "scan_group (peeked);" - << "scan_.peek ();" + << "if (state_ != peeked)" + << "{" + << "scan_group ();" + << "state_ = peeked;" + << "}" << "// Return unescaped." << endl - << "return arg_[i_].c_str ();" + << "return arg_[i_][j_ - 1].c_str ();" << "}" << "const char* group_scanner::" << endl << "next ()" << "{" << "if (state_ != peeked)" << endl - << "scan_group (peeked);" - << "scan_.next ();" - << "scan_group (scanned);" + << "scan_group ();" + << "state_ = scanned;" << "// Return unescaped." << endl - << "return arg_[i_].c_str ();" + << "return arg_[i_][--j_].c_str ();" << "}" << "void group_scanner::" << endl << "skip ()" << "{" << "if (state_ != peeked)" << endl - << "scan_group (peeked);" - << "scan_.skip ();" - << "scan_group (skipped);" + << "scan_group ();" + << "state_ = skipped;" + << "--j_;" << "}" << "std::size_t group_scanner::" << endl << "position ()" << "{" - << "return scan_.position ();" + << "return j_ == 0 ? scan_.position () : pos_ + (arg_[i_].size () - j_);" << "}" << "void group_scanner::" << endl - << "scan_group (state st)" + << "scan_group ()" << "{" << "// If the previous argument has been scanned, then make" << endl << "// sure the group has been scanned (handled) as well." << endl @@ -782,58 +783,66 @@ generate_runtime_source (context& ctx, bool complete) << "if (state_ == scanned)" << "{" << "if (group_scan_.end () != group_.size ())" << endl - << "throw unexpected_group (arg_[i_], group_scan_.next ());" + << "throw unexpected_group (arg_[i_][j_], group_scan_.next ());" << "}" + << "// If we still have arguments in the pack, rewind the group." << endl + << "//" << endl + << "if (j_ != 0)" + << "{" + << "group_scan_.reset ();" + << "return;" + << "}" + + // Position must remain the same from before the first call to peek() + // (comes directly from the scanner) and until next(). + // // Note that while it may seem like a good idea to pass // scan_.position() to reset() below, the trailing group positions // will overlap with the argument's. So it seems best to start // positions of each argument in a group from 0. // - << "if (state_ != peeked)" - << "{" - << "arg_[i_ == 0 ? ++i_ : --i_].clear ();" - << "group_.clear ();" - << "group_scan_.reset ();" - << "}" - - << "// We recognize all group sequences both before and " << endl - << "// after the argument and diagnose any misuse. We may" << endl - << "// also have multiple groups:" << endl - << "//" << endl - << "// { -x }+ { -y }+ arg" << endl - << "//" << endl + // Note also that we try hard not to throw away allocated memory in + // arg_[][0]. + // + << "i_ += (i_ == 0 ? 1 : -1);" + << "group_.clear ();" + << "group_scan_.reset ();" + << "pos_ = scan_.position ();" << endl - << "// Using group_ won't cover empty groups." << endl + + << "// Note: using group_ won't cover empty groups and using" << endl + << "// j_ won't cover single-argument packs." << endl << "//" << endl - << "bool g (false);" + << "bool group (false), pack (false);" << endl - << "while (scan_.more ())" + << "do" << "{" - << "const char* a (scan_.peek ());" + << "const char* a (scan_.next ());" << "size_t i (*a == '\\\\' ? 1 : 0);" << "separator s (sense (a + i));" << endl << "if (s == none || i != 0)" << "{" - << "if (state_ != peeked)" << endl - << "arg_[i_] = a + (s != none ? i : 0);" + << "if (arg_[i_].size () != 1)" << endl + << "arg_[i_].resize (1);" + << endl + << "arg_[i_][0] = a + (s != none ? i : 0);" + << "j_ = 1;" << "break;" << "}" - << "// Start of a leading group for the next argument." << endl + << "// Start of a leading group for the next argument or" << endl + << "// argument pack. We will only know which once we see" << endl + << "// the closing separator." << endl << "//" << endl - << "if (s == open && state_ == peeked)" << endl - << "break;" - << endl - << "if (s != (state_ == peeked ? open_plus : open))" << endl + << "if (s != open)" << endl << "throw group_separator (a, \"\");" << endl - << "g = true;" + << "size_t n (group_.size ());" << endl << "// Scan the group until the closing separator." << endl << "//" << endl - << "scan_.next ();" << "s = none;" << "while (s == none && scan_.more ())" << "{" @@ -848,20 +857,88 @@ generate_runtime_source (context& ctx, bool complete) << "}" << "}" - << "if (s != (state_ == peeked ? close : close_plus))" + << "if (s == close)" << "{" - << "throw group_separator ((s != none ? a : \"\")," << endl - << "(state_ == peeked ? \"}\" : \"}+\"));" + << "size_t m (group_.size ());" + << endl + << "j_ = m - n;" + << "if (j_ == 0)" << endl + << "throw group_separator (\"{\", \"\");" + << endl + << "if (arg_[i_].size () != j_)" << endl + << "arg_[i_].resize (j_);" + << endl + << "// Move from group_ to arg_. Add in reverse for ease " << endl + << "// of iteration." << endl + << "//" << endl + << "for (size_t j (0); j != j_; ++j)" << endl + << "arg_[i_][j] = group_[m - j - 1];" + << "group_.resize (n);" + << endl + << "pack = true;" + << "break;" << "}" + << "else if (s == close_plus)" << endl + << "group = true;" + << "else" << endl + << "throw group_separator ((s != none ? a : \"\"), \"}+\");" << "}" + << "while (scan_.more ());" + << endl << "// Handle the case where we have seen the leading group" << endl << "// but there are no more arguments." << endl << "//" << endl - << "if (g && state_ != peeked && !scan_.more ())" << endl + << "if (group && j_ == 0)" << endl << "throw group_separator (\"{\", \"\");" << endl - << "state_ = st;" + << "// Handle trailing groups, if any." << endl + << "//" << endl + + << "while (scan_.more ())" + << "{" + << "const char* a (scan_.peek ());" + << "size_t i (*a == '\\\\' ? 1 : 0);" + << "separator s (sense (a + i));" + << endl + + << "// Next argument, argument pack, or leading group." << endl + << "//" << endl + << "if (s == none || s == open || i != 0)" << endl + << "break;" + << endl + << "if (s != open_plus)" << endl + << "throw group_separator (a, \"\");" + << endl + << "group = true;" + << endl + << "// Scan the group until the closing separator." << endl + << "//" << endl + << "scan_.next ();" + << "s = none;" + << "while (s == none && scan_.more ())" + << "{" + << "a = scan_.next ();" + << "i = (*a == '\\\\' ? 1 : 0);" + << "s = sense (a + i);" + << endl + << "if (s == none || i != 0)" + << "{" + << "group_.push_back (a + (s != none ? i : 0));" + << "s = none;" + << "}" + << "}" + + << "if (s != close)" << endl + << "throw group_separator ((s != none ? a : \"\"), \"}\");" + << "}" + + << "// Handle the case where we have seen the argument pack" << endl + << "// without leading or trailing group." << endl + << "//" << endl + << "if (pack && !group)" << endl + << "throw group_separator (\"{\", \"\");" + << "}"; } -- cgit v1.1