<?php
declare(strict_types=1);
namespace PhpMyAdmin\SqlParser\Utils;
use PhpMyAdmin\SqlParser\Context;
use function array_merge;
use function strlen;
use function substr;
use function trim;
/**
* Buffer query utilities.
*
* Implements a specialized lexer used to extract statements from large inputs
* that are being buffered. After each statement has been extracted, a lexer or
* a parser may be used.
*/
class BufferedQuery
{
// Constants that describe the current status of the parser.
// A string is being parsed.
public const STATUS_STRING = 16; // 0001 0000
public const STATUS_STRING_SINGLE_QUOTES = 17; // 0001 0001
public const STATUS_STRING_DOUBLE_QUOTES = 18; // 0001 0010
public const STATUS_STRING_BACKTICK = 20; // 0001 0100
// A comment is being parsed.
public const STATUS_COMMENT = 32; // 0010 0000
public const STATUS_COMMENT_BASH = 33; // 0010 0001
public const STATUS_COMMENT_C = 34; // 0010 0010
public const STATUS_COMMENT_SQL = 36; // 0010 0100
/**
* The query that is being processed.
*
* This field can be modified just by appending to it!
*
* @var string
*/
public $query = '';
/**
* The options of this parser.
*
* @var array<string, bool|string>
* @psalm-var array{delimiter?: non-empty-string, parse_delimiter?: bool, add_delimiter?: bool}
*/
public $options = [];
/**
* The last delimiter used.
*
* @var string
*/
public $delimiter;
/**
* The length of the delimiter.
*
* @var int
*/
public $delimiterLen;
/**
* The current status of the parser.
*
* @var int|null
*/
public $status;
/**
* The last incomplete query that was extracted.
*
* @var string
*/
public $current = '';
/**
* @param string $query the query to be parsed
* @param array<string, bool|string> $options the options of this parser
* @psalm-param array{delimiter?: non-empty-string, parse_delimiter?: bool, add_delimiter?: bool} $options
*/
public function __construct($query = '', array $options = [])
{
// Merges specified options with defaults.
$this->options = array_merge(
[
// The starting delimiter.
'delimiter' => ';',
// Whether `DELIMITER` statements should be parsed.
'parse_delimiter' => false,
// Whether a delimiter should be added at the end of the statement.
'add_delimiter' => false,
],
$options
);
$this->query = $query;
$this->setDelimiter($this->options['delimiter']);
}
/**
* Sets the delimiter.
*
* Used to update the length of it too.
*
* @param string $delimiter
*
* @return void
*/
public function setDelimiter($delimiter)
{
$this->delimiter = $delimiter;
$this->delimiterLen = strlen($delimiter);
}
/**
* Extracts a statement from the buffer.
*
* @param bool $end whether the end of the buffer was reached
*
* @return string|false
*/
public function extract($end = false)
{
/**
* The last parsed position.
*
* This is statically defined because it is not used outside anywhere
* outside this method and there is probably a (minor) performance
* improvement to it.
*
* @var int
*/
static $i = 0;
if (empty($this->query)) {
return false;
}
/**
* The length of the buffer.
*
* @var int
*/
$len = strlen($this->query);
/**
* The last index of the string that is going to be parsed.
*
* There must be a few characters left in the buffer so the parser can
* avoid confusing some symbols that may have multiple meanings.
*
* For example, if the buffer ends in `-` that may be an operator or the
* beginning of a comment.
*
* Another example if the buffer ends in `DELIMITE`. The parser is going
* to require a few more characters because that may be a part of the
* `DELIMITER` keyword or just a column named `DELIMITE`.
*
* Those extra characters are required only if there is more data
* expected (the end of the buffer was not reached).
*/
$loopLen = $end ? $len : $len - 16;
for (; $i < $loopLen; ++$i) {
/*
* Handling backslash.
*
* Even if the next character is a special character that should be
* treated differently, because of the preceding backslash, it will
* be ignored.
*/
if ((($this->status & self::STATUS_COMMENT) === 0) && ($this->query[$i] === '\\')) {
$this->current .= $this->query[$i] . ($i + 1 < $len ? $this->query[++$i] : '');
continue;
}
/*
* Handling special parses statuses.
*/
if ($this->status === self::STATUS_STRING_SINGLE_QUOTES) {
// Single-quoted strings like 'foo'.
if ($this->query[$i] === '\'') {
$this->status = 0;
}
$this->current .= $this->query[$i];
continue;
} elseif ($this->status === self::STATUS_STRING_DOUBLE_QUOTES) {
// Double-quoted strings like "bar".
if ($this->query[$i] === '"') {
$this->status = 0;
}
$this->current .= $this->query[$i];
continue;
} elseif ($this->status === self::STATUS_STRING_BACKTICK) {
if ($this->query[$i] === '`') {
$this->status = 0;
}
$this->current .= $this->query[$i];
continue;
} elseif (($this->status === self::STATUS_COMMENT_BASH) || ($this->status === self::STATUS_COMMENT_SQL)) {
// Bash-like (#) or SQL-like (-- ) comments end in new line.
if ($this->query[$i] === "\n") {
$this->status = 0;
}
$this->current .= $this->query[$i];
continue;
} elseif ($this->status === self::STATUS_COMMENT_C) {
// C-like comments end in */.
if (($this->query[$i - 1] === '*') && ($this->query[$i] === '/')) {
$this->status = 0;
}
$this->current .= $this->query[$i];
continue;
}
/*
* Checking if a string started.
*/
if ($this->query[$i] === '\'') {
$this->status = self::STATUS_STRING_SINGLE_QUOTES;
$this->current .= $this->query[$i];
continue;
}
if ($this->query[$i] === '"') {
$this->status = self::STATUS_STRING_DOUBLE_QUOTES;
$this->current .= $this->query[$i];
continue;
}
if ($this->query[$i] === '`') {
$this->status = self::STATUS_STRING_BACKTICK;
$this->current .= $this->query[$i];
continue;
}
/*
* Checking if a comment started.
*/
if ($this->query[$i] === '#') {
$this->status = self::STATUS_COMMENT_BASH;
$this->current .= $this->query[$i];
continue;
}
if ($i + 2 < $len) {
if (
($this->query[$i] === '-')
&& ($this->query[$i + 1] === '-')
&& Context::isWhitespace($this->query[$i + 2])
) {
$this->status = self::STATUS_COMMENT_SQL;
$this->current .= $this->query[$i];
continue;
}
if (($this->query[$i] === '/') && ($this->query[$i + 1] === '*') && ($this->query[$i + 2] !== '!')) {
$this->status = self::STATUS_COMMENT_C;
$this->current .= $this->query[$i];
continue;
}
}
/*
* Handling `DELIMITER` statement.
*
* The code below basically checks for
* `strtoupper(substr($this->query, $i, 9)) === 'DELIMITER'`
*
* This optimization makes the code about 3 times faster.
*
* `DELIMITER` is not being considered a keyword. The only context
* it has a special meaning is when it is the beginning of a
* statement. This is the reason for the last condition.
*/
if (
($i + 9 < $len)
&& (($this->query[$i] === 'D') || ($this->query[$i] === 'd'))
&& (($this->query[$i + 1] === 'E') || ($this->query[$i + 1] === 'e'))
&& (($this->query[$i + 2] === 'L') || ($this->query[$i + 2] === 'l'))
&& (($this->query[$i + 3] === 'I') || ($this->query[$i + 3] === 'i'))
&& (($this->query[$i + 4] === 'M') || ($this->query[$i + 4] === 'm'))
&& (($this->query[$i + 5] === 'I') || ($this->query[$i + 5] === 'i'))
&& (($this->query[$i + 6] === 'T') || ($this->query[$i + 6] === 't'))
&& (($this->query[$i + 7] === 'E') || ($this->query[$i + 7] === 'e'))
&& (($this->query[$i + 8] === 'R') || ($this->query[$i + 8] === 'r'))
&& Context::isWhitespace($this->query[$i + 9])
) {
// Saving the current index to be able to revert any parsing
// done in this block.
$iBak = $i;
$i += 9; // Skipping `DELIMITER`.
// Skipping whitespaces.
while (($i < $len) && Context::isWhitespace($this->query[$i])) {
++$i;
}
// Parsing the delimiter.
$delimiter = '';
while (($i < $len) && (! Context::isWhitespace($this->query[$i]))) {
$delimiter .= $this->query[$i++];
}
// Checking if the delimiter definition ended.
if (
($delimiter !== '')
&& (($i < $len) && Context::isWhitespace($this->query[$i])
|| (($i === $len) && $end))
) {
// Saving the delimiter.
$this->setDelimiter($delimiter);
// Whether this statement should be returned or not.
$ret = '';
if (! empty($this->options['parse_delimiter'])) {
// Appending the `DELIMITER` statement that was just
// found to the current statement.
$ret = trim(
$this->current . ' ' . substr($this->query, $iBak, $i - $iBak)
);
}
// Removing the statement that was just extracted from the
// query.
$this->query = substr($this->query, $i);
$i = 0;
// Resetting the current statement.
$this->current = '';
return $ret;
}
// Incomplete statement. Reverting
$i = $iBak;
return false;
}
/*
* Checking if the current statement finished.
*
* The first letter of the delimiter is being checked as an
* optimization. This code is almost as fast as the one above.
*
* There is no point in checking if two strings match if not even
* the first letter matches.
*/
if (
($this->query[$i] === $this->delimiter[0])
&& (($this->delimiterLen === 1)
|| (substr($this->query, $i, $this->delimiterLen) === $this->delimiter))
) {
// Saving the statement that just ended.
$ret = $this->current;
// If needed, adds a delimiter at the end of the statement.
if (! empty($this->options['add_delimiter'])) {
$ret .= $this->delimiter;
}
// Removing the statement that was just extracted from the
// query.
$this->query = substr($this->query, $i + $this->delimiterLen);
$i = 0;
// Resetting the current statement.
$this->current = '';
// Returning the statement.
return trim($ret);
}
/*
* Appending current character to current statement.
*/
$this->current .= $this->query[$i];
}
if ($end && ($i === $len)) {
// If the end of the buffer was reached, the buffer is emptied and
// the current statement that was extracted is returned.
$ret = $this->current;
// Emptying the buffer.
$this->query = '';
$i = 0;
// Resetting the current statement.
$this->current = '';
// Returning the statement.
return trim($ret);
}
return '';
}
}