From 258b34e3afcaeea7864825819986f9754a6881f9 Mon Sep 17 00:00:00 2001 From: Amaan Qureshi Date: Mon, 21 Aug 2023 19:25:06 -0400 Subject: [PATCH] feat: support extglob patterns --- grammar.js | 17 +++++--- src/scanner.c | 83 +++++++++++++++++++++++++++++++++++--- test/corpus/statements.txt | 20 ++++++++- 3 files changed, 106 insertions(+), 14 deletions(-) diff --git a/grammar.js b/grammar.js index de6c08e5d..9ce4dc9d2 100644 --- a/grammar.js +++ b/grammar.js @@ -55,6 +55,7 @@ module.exports = grammar({ $._concat, $.variable_name, // Variable name followed by an operator like '=' or '+=' $.regex, + $.extglob_pattern, '}', ']', '<<', @@ -264,10 +265,14 @@ module.exports = grammar({ ), case_item: $ => seq( - optional('('), - field('value', $._literal), - repeat(seq('|', field('value', $._literal))), - ')', + choice( + seq( + optional('('), + field('value', choice($._literal, $.extglob_pattern)), + repeat(seq('|', field('value', choice($._literal, $.extglob_pattern)))), + ')', + ), + ), optional($._statements), prec(1, choice( field('termination', ';;'), @@ -277,8 +282,8 @@ module.exports = grammar({ last_case_item: $ => seq( optional('('), - field('value', $._literal), - repeat(seq('|', field('value', $._literal))), + field('value', choice($._literal, $.extglob_pattern)), + repeat(seq('|', field('value', choice($._literal, $.extglob_pattern)))), ')', optional($._statements), optional(prec(1, ';;')), diff --git a/src/scanner.c b/src/scanner.c index 422aa6299..8a58bc400 100644 --- a/src/scanner.c +++ b/src/scanner.c @@ -47,6 +47,7 @@ enum TokenType { CONCAT, VARIABLE_NAME, REGEX, + EXTGLOB_PATTERN, CLOSING_BRACE, CLOSING_BRACKET, HEREDOC_ARROW, @@ -77,12 +78,9 @@ static inline void advance(TSLexer *lexer) { lexer->advance(lexer, false); } static inline void skip(TSLexer *lexer) { lexer->advance(lexer, true); } static inline bool in_error_recovery(const bool *valid_symbols) { - if (valid_symbols[HEREDOC_START] && valid_symbols[HEREDOC_BODY_END] && - valid_symbols[FILE_DESCRIPTOR] && valid_symbols[EMPTY_VALUE] && - valid_symbols[CONCAT] && valid_symbols[REGEX]) { - return true; - } - return false; + return valid_symbols[HEREDOC_START] && valid_symbols[HEREDOC_BODY_END] && + valid_symbols[FILE_DESCRIPTOR] && valid_symbols[EMPTY_VALUE] && + valid_symbols[CONCAT] && valid_symbols[REGEX]; } static unsigned serialize(Scanner *scanner, char *buffer) { @@ -458,6 +456,79 @@ static bool scan(Scanner *scanner, TSLexer *lexer, const bool *valid_symbols) { } } + if (valid_symbols[EXTGLOB_PATTERN]) { + // first skip ws, then check for ? * + @ ! + while (iswspace(lexer->lookahead)) { + skip(lexer); + } + + if (lexer->lookahead == '?' || lexer->lookahead == '*' || + lexer->lookahead == '+' || lexer->lookahead == '@' || + lexer->lookahead == '!') { + lexer->mark_end(lexer); + advance(lexer); + + if (lexer->lookahead != '(') { + return false; + } + + typedef struct { + bool done; + uint32_t paren_depth; + uint32_t bracket_depth; + uint32_t brace_depth; + } State; + + State state = {false, 0, 0, 0}; + while (!state.done) { + switch (lexer->lookahead) { + case '\0': + return false; + case '(': + state.paren_depth++; + break; + case '[': + state.bracket_depth++; + break; + case '{': + state.brace_depth++; + break; + case ')': + if (state.paren_depth == 0) { + state.done = true; + } + state.paren_depth--; + break; + case ']': + if (state.bracket_depth == 0) { + state.done = true; + } + state.bracket_depth--; + break; + case '}': + if (state.brace_depth == 0) { + state.done = true; + } + state.brace_depth--; + break; + } + + if (!state.done) { + bool was_space = iswspace(lexer->lookahead); + advance(lexer); + if (!was_space) { + lexer->mark_end(lexer); + } + } + } + + lexer->result_symbol = EXTGLOB_PATTERN; + return true; + } + + return false; + } + return false; } diff --git a/test/corpus/statements.txt b/test/corpus/statements.txt index c0e53aee6..29f494c51 100644 --- a/test/corpus/statements.txt +++ b/test/corpus/statements.txt @@ -622,7 +622,11 @@ esac case x in x) echo meow ;; esac case foo in - bar\ baz) : ;; + bar\ baz) : ;; +esac + +case "$arg" in + *([0-9])([0-9])) echo "$arg" esac -------------------------------------------------------------------------------- @@ -706,7 +710,19 @@ esac (word) (command (command_name - (word)))))) + (word))))) + (case_statement + (string + (simple_expansion + (variable_name))) + (case_item + (extglob_pattern) + (command + (command_name + (word)) + (string + (simple_expansion + (variable_name))))))) ================================================================================ Test commands