<?php declare(strict_types=1); namespace PhpMyAdmin\SqlParser\Statements; use PhpMyAdmin\SqlParser\Components\Array2d; use PhpMyAdmin\SqlParser\Components\OptionsArray; use PhpMyAdmin\SqlParser\Components\WithKeyword; use PhpMyAdmin\SqlParser\Exceptions\ParserException; use PhpMyAdmin\SqlParser\Parser; use PhpMyAdmin\SqlParser\Statement; use PhpMyAdmin\SqlParser\Token; use PhpMyAdmin\SqlParser\TokensList; use PhpMyAdmin\SqlParser\Translator; use function array_slice; use function count; /** * `WITH` statement. * WITH [RECURSIVE] query_name [ (column_name [,...]) ] AS (SELECT ...) [, ...] */ final class WithStatement extends Statement { /** * Options for `WITH` statements and their slot ID. * * @var array<string, int|array<int, int|string>> * @psalm-var array<string, (positive-int|array{positive-int, ('var'|'var='|'expr'|'expr=')})> */ public static $OPTIONS = ['RECURSIVE' => 1]; /** * The clauses of this statement, in order. * * @see Statement::$CLAUSES * * @var array<string, array<int, int|string>> * @psalm-var array<string, array{non-empty-string, (1|2|3)}> */ public static $CLAUSES = [ 'WITH' => [ 'WITH', 2, ], // Used for options. '_OPTIONS' => [ '_OPTIONS', 1, ], 'AS' => [ 'AS', 2, ], ]; /** @var WithKeyword[] */ public $withers = []; /** * holds the CTE parser. * * @var Parser|null */ public $cteStatementParser; /** * @param Parser $parser the instance that requests parsing * @param TokensList $list the list of tokens to be parsed * * @return void */ public function parse(Parser $parser, TokensList $list) { /** * The state of the parser. * * Below are the states of the parser. * * 0 ---------------- [ name ] -----------------> 1 * * 1 ------------------ [ ( ] ------------------> 2 * * 2 ------------------ [ AS ] -----------------> 3 * * 3 ------------------ [ ( ] ------------------> 4 * * 4 ------------------ [ , ] ------------------> 1 * * 4 ----- [ SELECT/UPDATE/DELETE/INSERT ] -----> 5 * * @var int */ $state = 0; $wither = null; ++$list->idx; // Skipping `WITH`. // parse any options if provided $this->options = OptionsArray::parse($parser, $list, static::$OPTIONS); ++$list->idx; for (; $list->idx < $list->count; ++$list->idx) { /** * Token parsed at this moment. */ $token = $list->tokens[$list->idx]; // Skipping whitespaces and comments. if ($token->type === Token::TYPE_WHITESPACE || $token->type === Token::TYPE_COMMENT) { continue; } if ($state === 0) { if ($token->type !== Token::TYPE_NONE) { $parser->error('The name of the CTE was expected.', $token); break; } $wither = $token->value; $this->withers[$wither] = new WithKeyword($wither); $state = 1; } elseif ($state === 1) { if ($token->type === Token::TYPE_OPERATOR && $token->value === '(') { $this->withers[$wither]->columns = Array2d::parse($parser, $list); $state = 2; } elseif ($token->type === Token::TYPE_KEYWORD && $token->keyword === 'AS') { $state = 3; } else { $parser->error('Unexpected token.', $token); break; } } elseif ($state === 2) { if (! ($token->type === Token::TYPE_KEYWORD && $token->keyword === 'AS')) { $parser->error('AS keyword was expected.', $token); break; } $state = 3; } elseif ($state === 3) { $idxBeforeGetNext = $list->idx; $list->idx++; // Ignore the current token $nextKeyword = $list->getNext(); if (! ($token->value === '(' && ($nextKeyword && $nextKeyword->value === 'SELECT'))) { $parser->error('Subquery of the CTE was expected.', $token); $list->idx = $idxBeforeGetNext; break; } // Restore the index $list->idx = $idxBeforeGetNext; ++$list->idx; $subList = $this->getSubTokenList($list); if ($subList instanceof ParserException) { $parser->errors[] = $subList; break; } $subParser = new Parser($subList); if (count($subParser->errors)) { foreach ($subParser->errors as $error) { $parser->errors[] = $error; } break; } $this->withers[$wither]->statement = $subParser; $state = 4; } elseif ($state === 4) { if ($token->value === ',') { // There's another WITH expression to parse, go back to state=0 $state = 0; continue; } if ( $token->type === Token::TYPE_KEYWORD && ( $token->value === 'SELECT' || $token->value === 'INSERT' || $token->value === 'UPDATE' || $token->value === 'DELETE' ) ) { $state = 5; --$list->idx; continue; } $parser->error('An expression was expected.', $token); break; } elseif ($state === 5) { /** * We need to parse all of the remaining tokens becuase mostly, they are only the CTE expression * which's mostly is SELECT, or INSERT, UPDATE, or delete statement. * e.g: INSERT .. ( SELECT 1 ) SELECT col1 FROM cte ON DUPLICATE KEY UPDATE col_name = 3. * The issue is that, `ON DUPLICATE KEY UPDATE col_name = 3` is related to the main INSERT query * not the cte expression (SELECT col1 FROM cte) we need to determine the end of the expression * to parse `ON DUPLICATE KEY UPDATE` from the InsertStatement parser instead. */ // Index of the last parsed token by default would be the last token in the $list, because we're // assuming that all remaining tokens at state 4, are related to the expression. $idxOfLastParsedToken = $list->count - 1; // Index before search to be able to restore the index. $idxBeforeSearch = $list->idx; // Length of expression tokens is null by default, in order for the $subList to start // from $list->idx to the end of the $list. $lengthOfExpressionTokens = null; if ($list->getNextOfTypeAndValue(Token::TYPE_KEYWORD, 'ON')) { // (-1) because getNextOfTypeAndValue returned ON and increased the index. $idxOfOn = $list->idx - 1; // We want to make sure that it's `ON DUPLICATE KEY UPDATE` $dubplicateToken = $list->getNext(); $keyToken = $list->getNext(); $updateToken = $list->getNext(); if ( $dubplicateToken && $dubplicateToken->keyword === 'DUPLICATE' && ($keyToken && $keyToken->keyword === 'KEY') && ($updateToken && $updateToken->keyword === 'UPDATE') ) { // Index of the last parsed token will be the token before the ON Keyword $idxOfLastParsedToken = $idxOfOn - 1; // The length of the expression tokens would be the difference // between the first unrelated token `ON` and the idx // before skipping the CTE tokens. $lengthOfExpressionTokens = $idxOfOn - $idxBeforeSearch; } } // Restore the index $list->idx = $idxBeforeSearch; $subList = new TokensList(array_slice($list->tokens, $list->idx, $lengthOfExpressionTokens)); $subParser = new Parser($subList); if (count($subParser->errors)) { foreach ($subParser->errors as $error) { $parser->errors[] = $error; } break; } $this->cteStatementParser = $subParser; $list->idx = $idxOfLastParsedToken; break; } } // 5 is the only valid end state if ($state !== 5) { /** * Token parsed at this moment. */ $token = $list->tokens[$list->idx]; $parser->error('Unexpected end of the WITH CTE.', $token); } --$list->idx; } /** * {@inheritdoc} */ public function build() { $str = 'WITH '; foreach ($this->withers as $wither) { $str .= $str === 'WITH ' ? '' : ', '; $str .= WithKeyword::build($wither); } $str .= ' '; if ($this->cteStatementParser) { foreach ($this->cteStatementParser->statements as $statement) { $str .= $statement->build(); } } return $str; } /** * Get tokens within the WITH expression to use them in another parser * * @return ParserException|TokensList */ private function getSubTokenList(TokensList $list) { $idx = $list->idx; $token = $list->tokens[$list->idx]; $openParenthesis = 0; while ($list->idx < $list->count) { if ($token->value === '(') { ++$openParenthesis; } elseif ($token->value === ')') { if (--$openParenthesis === -1) { break; } } ++$list->idx; if (! isset($list->tokens[$list->idx])) { break; } $token = $list->tokens[$list->idx]; } // performance improvement: return the error to avoid a try/catch in the loop if ($list->idx === $list->count) { --$list->idx; return new ParserException( Translator::gettext('A closing bracket was expected.'), $token ); } $length = $list->idx - $idx; return new TokensList(array_slice($list->tokens, $idx, $length), $length); } }