Edit on GitHub

sqlglot.parser

View Source

   1from __future__ import annotations
   2
   3import itertools
   4import logging
   5import re
   6import typing as t
   7from collections import defaultdict
   8
   9from sqlglot import exp
  10from sqlglot.errors import (
  11    ErrorLevel,
  12    ParseError,
  13    TokenError,
  14    concat_messages,
  15    highlight_sql,
  16    merge_errors,
  17)
  18from sqlglot.expressions import apply_index_offset
  19from sqlglot.helper import ensure_list, i64, seq_get
  20from sqlglot.trie import new_trie
  21from sqlglot.time import format_time
  22from sqlglot.tokens import Token, Tokenizer, TokenType
  23from sqlglot.trie import TrieResult, in_trie
  24from collections.abc import Sequence
  25from builtins import type as Type
  26
  27if t.TYPE_CHECKING:
  28    from sqlglot.expressions import ExpOrStr
  29    from sqlglot._typing import E, BuilderArgs
  30    from sqlglot.dialects.dialect import Dialect, DialectType
  31
  32    from re import Pattern
  33
  34    T = t.TypeVar("T")
  35    TCeilFloor = t.TypeVar("TCeilFloor", exp.Ceil, exp.Floor)
  36
  37logger = logging.getLogger("sqlglot")
  38
  39OPTIONS_TYPE = dict[str, Sequence[t.Union[Sequence[str], str]]]
  40
  41# Used to detect alphabetical characters and +/- in timestamp literals
  42TIME_ZONE_RE: Pattern[str] = re.compile(r":.*?[a-zA-Z\+\-]")
  43
  44
  45def build_var_map(args: BuilderArgs) -> exp.StarMap | exp.VarMap:
  46    if len(args) == 1 and args[0].is_star:
  47        return exp.StarMap(this=args[0])
  48
  49    keys: list[ExpOrStr] = []
  50    values: list[ExpOrStr] = []
  51    for i in range(0, len(args), 2):
  52        keys.append(args[i])
  53        values.append(args[i + 1])
  54
  55    return exp.VarMap(keys=exp.array(*keys, copy=False), values=exp.array(*values, copy=False))
  56
  57
  58def build_like(args: BuilderArgs) -> exp.Escape | exp.Like:
  59    like = exp.Like(this=seq_get(args, 1), expression=seq_get(args, 0))
  60    return exp.Escape(this=like, expression=seq_get(args, 2)) if len(args) > 2 else like
  61
  62
  63def binary_range_parser(
  64    expr_type: Type[exp.Expr], reverse_args: bool = False
  65) -> t.Callable[[Parser, exp.Expr | None], exp.Expr | None]:
  66    def _parse_binary_range(self: Parser, this: exp.Expr | None) -> exp.Expr | None:
  67        expression = self._parse_bitwise()
  68        if reverse_args:
  69            this, expression = expression, this
  70        return self._parse_escape(self.expression(expr_type(this=this, expression=expression)))
  71
  72    return _parse_binary_range
  73
  74
  75def build_logarithm(args: BuilderArgs, dialect: Dialect) -> exp.Func:
  76    # Default argument order is base, expression
  77    this = seq_get(args, 0)
  78    expression = seq_get(args, 1)
  79
  80    if expression:
  81        if not dialect.LOG_BASE_FIRST:
  82            this, expression = expression, this
  83        return exp.Log(this=this, expression=expression)
  84
  85    return (exp.Ln if dialect.parser_class.LOG_DEFAULTS_TO_LN else exp.Log)(this=this)
  86
  87
  88def build_hex(args: BuilderArgs, dialect: Dialect) -> exp.Hex | exp.LowerHex:
  89    arg = seq_get(args, 0)
  90    return exp.LowerHex(this=arg) if dialect.HEX_LOWERCASE else exp.Hex(this=arg)
  91
  92
  93def build_lower(args: BuilderArgs) -> exp.Lower | exp.Hex:
  94    # LOWER(HEX(..)) can be simplified to LowerHex to simplify its transpilation
  95    arg = seq_get(args, 0)
  96    return exp.LowerHex(this=arg.this) if isinstance(arg, exp.Hex) else exp.Lower(this=arg)
  97
  98
  99def build_upper(args: BuilderArgs) -> exp.Upper | exp.Hex:
 100    # UPPER(HEX(..)) can be simplified to Hex to simplify its transpilation
 101    arg = seq_get(args, 0)
 102    return exp.Hex(this=arg.this) if isinstance(arg, exp.Hex) else exp.Upper(this=arg)
 103
 104
 105def build_extract_json_with_path(
 106    expr_type: Type[E],
 107) -> t.Callable[[BuilderArgs, Dialect], E]:
 108    def _builder(args: BuilderArgs, dialect: Dialect) -> E:
 109        expression = expr_type(
 110            this=seq_get(args, 0), expression=dialect.to_json_path(seq_get(args, 1))
 111        )
 112        if len(args) > 2 and expr_type is exp.JSONExtract:
 113            expression.set("expressions", args[2:])
 114        if expr_type is exp.JSONExtractScalar:
 115            expression.set("scalar_only", dialect.JSON_EXTRACT_SCALAR_SCALAR_ONLY)
 116
 117        return expression
 118
 119    return _builder
 120
 121
 122def build_mod(args: BuilderArgs) -> exp.Mod:
 123    this = seq_get(args, 0)
 124    expression = seq_get(args, 1)
 125
 126    # Wrap the operands if they are binary nodes, e.g. MOD(a + 1, 7) -> (a + 1) % 7
 127    this = exp.Paren(this=this) if isinstance(this, exp.Binary) else this
 128    expression = exp.Paren(this=expression) if isinstance(expression, exp.Binary) else expression
 129
 130    return exp.Mod(this=this, expression=expression)
 131
 132
 133def build_pad(args: BuilderArgs, is_left: bool = True):
 134    return exp.Pad(
 135        this=seq_get(args, 0),
 136        expression=seq_get(args, 1),
 137        fill_pattern=seq_get(args, 2),
 138        is_left=is_left,
 139    )
 140
 141
 142def build_array_constructor(
 143    exp_class: Type[E], args: list[t.Any], bracket_kind: TokenType, dialect: Dialect
 144) -> exp.Expr:
 145    array_exp = exp_class(expressions=args)
 146
 147    if exp_class == exp.Array and dialect.HAS_DISTINCT_ARRAY_CONSTRUCTORS:
 148        array_exp.set("bracket_notation", bracket_kind == TokenType.L_BRACKET)
 149
 150    return array_exp
 151
 152
 153def build_convert_timezone(
 154    args: BuilderArgs, default_source_tz: str | None = None
 155) -> exp.ConvertTimezone | exp.Anonymous:
 156    if len(args) == 2:
 157        source_tz = exp.Literal.string(default_source_tz) if default_source_tz else None
 158        return exp.ConvertTimezone(
 159            source_tz=source_tz, target_tz=seq_get(args, 0), timestamp=seq_get(args, 1)
 160        )
 161
 162    return exp.ConvertTimezone.from_arg_list(args)
 163
 164
 165def build_trim(args: BuilderArgs, is_left: bool = True, reverse_args: bool = False) -> exp.Trim:
 166    this, expression = seq_get(args, 0), seq_get(args, 1)
 167
 168    if expression and reverse_args:
 169        this, expression = expression, this
 170
 171    return exp.Trim(this=this, expression=expression, position="LEADING" if is_left else "TRAILING")
 172
 173
 174def build_coalesce(
 175    args: BuilderArgs, is_nvl: bool | None = None, is_null: bool | None = None
 176) -> exp.Coalesce:
 177    return exp.Coalesce(this=seq_get(args, 0), expressions=args[1:], is_nvl=is_nvl, is_null=is_null)
 178
 179
 180def build_locate_strposition(args: BuilderArgs) -> exp.StrPosition:
 181    return exp.StrPosition(
 182        this=seq_get(args, 1),
 183        substr=seq_get(args, 0),
 184        position=seq_get(args, 2),
 185    )
 186
 187
 188def build_array_append(args: BuilderArgs, dialect: Dialect) -> exp.ArrayAppend:
 189    """
 190    Builds ArrayAppend with NULL propagation semantics based on the dialect configuration.
 191
 192    Some dialects (Databricks, Spark, Snowflake) return NULL when the input array is NULL.
 193    Others (DuckDB, PostgreSQL) create a new single-element array instead.
 194
 195    Args:
 196        args: Function arguments [array, element]
 197        dialect: The dialect to read ARRAY_FUNCS_PROPAGATES_NULLS from
 198
 199    Returns:
 200        ArrayAppend expression with appropriate null_propagation flag
 201    """
 202    return exp.ArrayAppend(
 203        this=seq_get(args, 0),
 204        expression=seq_get(args, 1),
 205        null_propagation=dialect.ARRAY_FUNCS_PROPAGATES_NULLS,
 206    )
 207
 208
 209def build_array_prepend(args: BuilderArgs, dialect: Dialect) -> exp.ArrayPrepend:
 210    """
 211    Builds ArrayPrepend with NULL propagation semantics based on the dialect configuration.
 212
 213    Some dialects (Databricks, Spark, Snowflake) return NULL when the input array is NULL.
 214    Others (DuckDB, PostgreSQL) create a new single-element array instead.
 215
 216    Args:
 217        args: Function arguments [array, element]
 218        dialect: The dialect to read ARRAY_FUNCS_PROPAGATES_NULLS from
 219
 220    Returns:
 221        ArrayPrepend expression with appropriate null_propagation flag
 222    """
 223    return exp.ArrayPrepend(
 224        this=seq_get(args, 0),
 225        expression=seq_get(args, 1),
 226        null_propagation=dialect.ARRAY_FUNCS_PROPAGATES_NULLS,
 227    )
 228
 229
 230def build_array_concat(args: BuilderArgs, dialect: Dialect) -> exp.ArrayConcat:
 231    """
 232    Builds ArrayConcat with NULL propagation semantics based on the dialect configuration.
 233
 234    Some dialects (Redshift, Snowflake) return NULL when any input array is NULL.
 235    Others (DuckDB, PostgreSQL) skip NULL arrays and continue concatenation.
 236
 237    Args:
 238        args: Function arguments [array1, array2, ...] (variadic)
 239        dialect: The dialect to read ARRAY_FUNCS_PROPAGATES_NULLS from
 240
 241    Returns:
 242        ArrayConcat expression with appropriate null_propagation flag
 243    """
 244    return exp.ArrayConcat(
 245        this=seq_get(args, 0),
 246        expressions=args[1:],
 247        null_propagation=dialect.ARRAY_FUNCS_PROPAGATES_NULLS,
 248    )
 249
 250
 251def build_array_remove(args: BuilderArgs, dialect: Dialect) -> exp.ArrayRemove:
 252    """
 253    Builds ArrayRemove with NULL propagation semantics based on the dialect configuration.
 254
 255    Some dialects (Snowflake) return NULL when the removal value is NULL.
 256    Others (DuckDB) may return empty array due to NULL comparison semantics.
 257
 258    Args:
 259        args: Function arguments [array, value_to_remove]
 260        dialect: The dialect to read ARRAY_FUNCS_PROPAGATES_NULLS from
 261
 262    Returns:
 263        ArrayRemove expression with appropriate null_propagation flag
 264    """
 265    return exp.ArrayRemove(
 266        this=seq_get(args, 0),
 267        expression=seq_get(args, 1),
 268        null_propagation=dialect.ARRAY_FUNCS_PROPAGATES_NULLS,
 269    )
 270
 271
 272def _resolve_dialect(dialect: DialectType) -> Dialect:
 273    from sqlglot.dialects.dialect import Dialect
 274
 275    return Dialect.get_or_raise(dialect)
 276
 277
 278SENTINEL_NONE: Token = Token(TokenType.SENTINEL, "SENTINEL")
 279
 280
 281class Parser:
 282    """
 283    Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree.
 284
 285    Args:
 286        error_level: The desired error level.
 287            Default: ErrorLevel.IMMEDIATE
 288        error_message_context: The amount of context to capture from a query string when displaying
 289            the error message (in number of characters).
 290            Default: 100
 291        max_errors: Maximum number of error messages to include in a raised ParseError.
 292            This is only relevant if error_level is ErrorLevel.RAISE.
 293            Default: 3
 294        max_nodes: Maximum number of AST nodes to prevent memory exhaustion.
 295            Set to -1 (default) to disable the check.
 296    """
 297
 298    __slots__ = (
 299        "error_level",
 300        "error_message_context",
 301        "max_errors",
 302        "max_nodes",
 303        "dialect",
 304        "sql",
 305        "errors",
 306        "_tokens",
 307        "_index",
 308        "_curr",
 309        "_next",
 310        "_prev",
 311        "_prev_comments",
 312        "_pipe_cte_counter",
 313        "_chunks",
 314        "_chunk_index",
 315        "_tokens_size",
 316        "_node_count",
 317    )
 318
 319    FUNCTIONS: t.ClassVar[dict[str, t.Callable]] = {
 320        **{name: func.from_arg_list for name, func in exp.FUNCTION_BY_NAME.items()},
 321        **dict.fromkeys(("COALESCE", "IFNULL", "NVL"), build_coalesce),
 322        "ARRAY": lambda args, dialect: exp.Array(expressions=args),
 323        "ARRAYAGG": lambda args, dialect: exp.ArrayAgg(
 324            this=seq_get(args, 0), nulls_excluded=dialect.ARRAY_AGG_INCLUDES_NULLS is None or None
 325        ),
 326        "ARRAY_AGG": lambda args, dialect: exp.ArrayAgg(
 327            this=seq_get(args, 0), nulls_excluded=dialect.ARRAY_AGG_INCLUDES_NULLS is None or None
 328        ),
 329        "ARRAY_APPEND": build_array_append,
 330        "ARRAY_CAT": build_array_concat,
 331        "ARRAY_CONCAT": build_array_concat,
 332        "ARRAY_INTERSECT": lambda args: exp.ArrayIntersect(expressions=args),
 333        "ARRAY_INTERSECTION": lambda args: exp.ArrayIntersect(expressions=args),
 334        "ARRAY_PREPEND": build_array_prepend,
 335        "ARRAY_REMOVE": build_array_remove,
 336        "COUNT": lambda args: exp.Count(this=seq_get(args, 0), expressions=args[1:], big_int=True),
 337        "CONCAT": lambda args, dialect: exp.Concat(
 338            expressions=args,
 339            safe=not dialect.STRICT_STRING_CONCAT,
 340            coalesce=dialect.CONCAT_COALESCE,
 341        ),
 342        "CONCAT_WS": lambda args, dialect: exp.ConcatWs(
 343            expressions=args,
 344            safe=not dialect.STRICT_STRING_CONCAT,
 345            coalesce=dialect.CONCAT_COALESCE,
 346        ),
 347        "CONVERT_TIMEZONE": build_convert_timezone,
 348        "DATE_TO_DATE_STR": lambda args: exp.Cast(
 349            this=seq_get(args, 0),
 350            to=exp.DataType(this=exp.DType.TEXT),
 351        ),
 352        "GENERATE_DATE_ARRAY": lambda args: exp.GenerateDateArray(
 353            start=seq_get(args, 0),
 354            end=seq_get(args, 1),
 355            step=seq_get(args, 2) or exp.Interval(this=exp.Literal.string(1), unit=exp.var("DAY")),
 356        ),
 357        "GENERATE_UUID": lambda args, dialect: exp.Uuid(
 358            is_string=dialect.UUID_IS_STRING_TYPE or None
 359        ),
 360        "GLOB": lambda args: exp.Glob(this=seq_get(args, 1), expression=seq_get(args, 0)),
 361        "GREATEST": lambda args, dialect: exp.Greatest(
 362            this=seq_get(args, 0),
 363            expressions=args[1:],
 364            ignore_nulls=dialect.LEAST_GREATEST_IGNORES_NULLS,
 365        ),
 366        "LEAST": lambda args, dialect: exp.Least(
 367            this=seq_get(args, 0),
 368            expressions=args[1:],
 369            ignore_nulls=dialect.LEAST_GREATEST_IGNORES_NULLS,
 370        ),
 371        "HEX": build_hex,
 372        "JSON_EXTRACT": build_extract_json_with_path(exp.JSONExtract),
 373        "JSON_EXTRACT_SCALAR": build_extract_json_with_path(exp.JSONExtractScalar),
 374        "JSON_EXTRACT_PATH_TEXT": build_extract_json_with_path(exp.JSONExtractScalar),
 375        "JSON_KEYS": lambda args, dialect: exp.JSONKeys(
 376            this=seq_get(args, 0), expression=dialect.to_json_path(seq_get(args, 1))
 377        ),
 378        "LIKE": build_like,
 379        "LOG": build_logarithm,
 380        "LOG2": lambda args: exp.Log(this=exp.Literal.number(2), expression=seq_get(args, 0)),
 381        "LOG10": lambda args: exp.Log(this=exp.Literal.number(10), expression=seq_get(args, 0)),
 382        "LOWER": build_lower,
 383        "LPAD": lambda args: build_pad(args),
 384        "LEFTPAD": lambda args: build_pad(args),
 385        "LTRIM": lambda args: build_trim(args),
 386        "MOD": build_mod,
 387        "RIGHTPAD": lambda args: build_pad(args, is_left=False),
 388        "RPAD": lambda args: build_pad(args, is_left=False),
 389        "RTRIM": lambda args: build_trim(args, is_left=False),
 390        "SCOPE_RESOLUTION": lambda args: (
 391            exp.ScopeResolution(expression=seq_get(args, 0))
 392            if len(args) != 2
 393            else exp.ScopeResolution(this=seq_get(args, 0), expression=seq_get(args, 1))
 394        ),
 395        "STRPOS": exp.StrPosition.from_arg_list,
 396        "CHARINDEX": lambda args: build_locate_strposition(args),
 397        "INSTR": exp.StrPosition.from_arg_list,
 398        "LOCATE": lambda args: build_locate_strposition(args),
 399        "TIME_TO_TIME_STR": lambda args: exp.Cast(
 400            this=seq_get(args, 0),
 401            to=exp.DataType(this=exp.DType.TEXT),
 402        ),
 403        "TO_HEX": build_hex,
 404        "TS_OR_DS_TO_DATE_STR": lambda args: exp.Substring(
 405            this=exp.Cast(
 406                this=seq_get(args, 0),
 407                to=exp.DataType(this=exp.DType.TEXT),
 408            ),
 409            start=exp.Literal.number(1),
 410            length=exp.Literal.number(10),
 411        ),
 412        "UNNEST": lambda args: exp.Unnest(expressions=ensure_list(seq_get(args, 0))),
 413        "UPPER": build_upper,
 414        "UUID": lambda args, dialect: exp.Uuid(is_string=dialect.UUID_IS_STRING_TYPE or None),
 415        "VAR_MAP": build_var_map,
 416    }
 417
 418    NO_PAREN_FUNCTIONS: t.ClassVar[dict] = {
 419        TokenType.CURRENT_DATE: exp.CurrentDate,
 420        TokenType.CURRENT_DATETIME: exp.CurrentDate,
 421        TokenType.CURRENT_TIME: exp.CurrentTime,
 422        TokenType.CURRENT_TIMESTAMP: exp.CurrentTimestamp,
 423        TokenType.CURRENT_USER: exp.CurrentUser,
 424        TokenType.CURRENT_ROLE: exp.CurrentRole,
 425    }
 426
 427    STRUCT_TYPE_TOKENS: t.ClassVar = {
 428        TokenType.NESTED,
 429        TokenType.OBJECT,
 430        TokenType.STRUCT,
 431        TokenType.UNION,
 432    }
 433
 434    NESTED_TYPE_TOKENS: t.ClassVar = {
 435        TokenType.ARRAY,
 436        TokenType.LIST,
 437        TokenType.LOWCARDINALITY,
 438        TokenType.MAP,
 439        TokenType.NULLABLE,
 440        TokenType.RANGE,
 441        *STRUCT_TYPE_TOKENS,
 442    }
 443
 444    ENUM_TYPE_TOKENS: t.ClassVar = {
 445        TokenType.DYNAMIC,
 446        TokenType.ENUM,
 447        TokenType.ENUM8,
 448        TokenType.ENUM16,
 449    }
 450
 451    AGGREGATE_TYPE_TOKENS: t.ClassVar = {
 452        TokenType.AGGREGATEFUNCTION,
 453        TokenType.SIMPLEAGGREGATEFUNCTION,
 454    }
 455
 456    TYPE_TOKENS: t.ClassVar = {
 457        TokenType.BIT,
 458        TokenType.BOOLEAN,
 459        TokenType.TINYINT,
 460        TokenType.UTINYINT,
 461        TokenType.SMALLINT,
 462        TokenType.USMALLINT,
 463        TokenType.INT,
 464        TokenType.UINT,
 465        TokenType.BIGINT,
 466        TokenType.UBIGINT,
 467        TokenType.BIGNUM,
 468        TokenType.INT128,
 469        TokenType.UINT128,
 470        TokenType.INT256,
 471        TokenType.UINT256,
 472        TokenType.MEDIUMINT,
 473        TokenType.UMEDIUMINT,
 474        TokenType.FIXEDSTRING,
 475        TokenType.FLOAT,
 476        TokenType.DOUBLE,
 477        TokenType.UDOUBLE,
 478        TokenType.CHAR,
 479        TokenType.NCHAR,
 480        TokenType.VARCHAR,
 481        TokenType.NVARCHAR,
 482        TokenType.BPCHAR,
 483        TokenType.TEXT,
 484        TokenType.MEDIUMTEXT,
 485        TokenType.LONGTEXT,
 486        TokenType.BLOB,
 487        TokenType.MEDIUMBLOB,
 488        TokenType.LONGBLOB,
 489        TokenType.BINARY,
 490        TokenType.VARBINARY,
 491        TokenType.JSON,
 492        TokenType.JSONB,
 493        TokenType.INTERVAL,
 494        TokenType.TINYBLOB,
 495        TokenType.TINYTEXT,
 496        TokenType.TIME,
 497        TokenType.TIMETZ,
 498        TokenType.TIME_NS,
 499        TokenType.TIMESTAMP,
 500        TokenType.TIMESTAMP_S,
 501        TokenType.TIMESTAMP_MS,
 502        TokenType.TIMESTAMP_NS,
 503        TokenType.TIMESTAMPTZ,
 504        TokenType.TIMESTAMPLTZ,
 505        TokenType.TIMESTAMPNTZ,
 506        TokenType.DATETIME,
 507        TokenType.DATETIME2,
 508        TokenType.DATETIME64,
 509        TokenType.SMALLDATETIME,
 510        TokenType.DATE,
 511        TokenType.DATE32,
 512        TokenType.INT4RANGE,
 513        TokenType.INT4MULTIRANGE,
 514        TokenType.INT8RANGE,
 515        TokenType.INT8MULTIRANGE,
 516        TokenType.NUMRANGE,
 517        TokenType.NUMMULTIRANGE,
 518        TokenType.TSRANGE,
 519        TokenType.TSMULTIRANGE,
 520        TokenType.TSTZRANGE,
 521        TokenType.TSTZMULTIRANGE,
 522        TokenType.DATERANGE,
 523        TokenType.DATEMULTIRANGE,
 524        TokenType.DECIMAL,
 525        TokenType.DECIMAL32,
 526        TokenType.DECIMAL64,
 527        TokenType.DECIMAL128,
 528        TokenType.DECIMAL256,
 529        TokenType.DECFLOAT,
 530        TokenType.UDECIMAL,
 531        TokenType.BIGDECIMAL,
 532        TokenType.UUID,
 533        TokenType.GEOGRAPHY,
 534        TokenType.GEOGRAPHYPOINT,
 535        TokenType.GEOMETRY,
 536        TokenType.POINT,
 537        TokenType.RING,
 538        TokenType.LINESTRING,
 539        TokenType.MULTILINESTRING,
 540        TokenType.POLYGON,
 541        TokenType.MULTIPOLYGON,
 542        TokenType.HLLSKETCH,
 543        TokenType.HSTORE,
 544        TokenType.PSEUDO_TYPE,
 545        TokenType.SUPER,
 546        TokenType.SERIAL,
 547        TokenType.SMALLSERIAL,
 548        TokenType.BIGSERIAL,
 549        TokenType.XML,
 550        TokenType.YEAR,
 551        TokenType.USERDEFINED,
 552        TokenType.MONEY,
 553        TokenType.SMALLMONEY,
 554        TokenType.ROWVERSION,
 555        TokenType.IMAGE,
 556        TokenType.VARIANT,
 557        TokenType.VECTOR,
 558        TokenType.VOID,
 559        TokenType.OBJECT,
 560        TokenType.OBJECT_IDENTIFIER,
 561        TokenType.INET,
 562        TokenType.IPADDRESS,
 563        TokenType.IPPREFIX,
 564        TokenType.IPV4,
 565        TokenType.IPV6,
 566        TokenType.UNKNOWN,
 567        TokenType.NOTHING,
 568        TokenType.NULL,
 569        TokenType.NAME,
 570        TokenType.TDIGEST,
 571        TokenType.DYNAMIC,
 572        *ENUM_TYPE_TOKENS,
 573        *NESTED_TYPE_TOKENS,
 574        *AGGREGATE_TYPE_TOKENS,
 575    }
 576
 577    SIGNED_TO_UNSIGNED_TYPE_TOKEN: t.ClassVar = {
 578        TokenType.BIGINT: TokenType.UBIGINT,
 579        TokenType.INT: TokenType.UINT,
 580        TokenType.MEDIUMINT: TokenType.UMEDIUMINT,
 581        TokenType.SMALLINT: TokenType.USMALLINT,
 582        TokenType.TINYINT: TokenType.UTINYINT,
 583        TokenType.DECIMAL: TokenType.UDECIMAL,
 584        TokenType.DOUBLE: TokenType.UDOUBLE,
 585    }
 586
 587    SUBQUERY_PREDICATES: t.ClassVar = {
 588        TokenType.ANY: exp.Any,
 589        TokenType.ALL: exp.All,
 590        TokenType.EXISTS: exp.Exists,
 591        TokenType.SOME: exp.Any,
 592    }
 593
 594    SUBQUERY_TOKENS: t.ClassVar = {
 595        TokenType.SELECT,
 596        TokenType.WITH,
 597        TokenType.FROM,
 598    }
 599
 600    RESERVED_TOKENS: t.ClassVar = {
 601        *Tokenizer.SINGLE_TOKENS.values(),
 602        TokenType.SELECT,
 603    } - {TokenType.IDENTIFIER}
 604
 605    DB_CREATABLES: t.ClassVar = {
 606        TokenType.DATABASE,
 607        TokenType.DICTIONARY,
 608        TokenType.FILE_FORMAT,
 609        TokenType.MODEL,
 610        TokenType.NAMESPACE,
 611        TokenType.SCHEMA,
 612        TokenType.SEMANTIC_VIEW,
 613        TokenType.SEQUENCE,
 614        TokenType.SINK,
 615        TokenType.SOURCE,
 616        TokenType.STAGE,
 617        TokenType.STORAGE_INTEGRATION,
 618        TokenType.STREAMLIT,
 619        TokenType.TABLE,
 620        TokenType.TAG,
 621        TokenType.VIEW,
 622        TokenType.WAREHOUSE,
 623    }
 624
 625    CREATABLES: t.ClassVar = {
 626        TokenType.COLUMN,
 627        TokenType.CONSTRAINT,
 628        TokenType.FOREIGN_KEY,
 629        TokenType.FUNCTION,
 630        TokenType.INDEX,
 631        TokenType.PROCEDURE,
 632        TokenType.TRIGGER,
 633        *DB_CREATABLES,
 634    }
 635
 636    TRIGGER_EVENTS: t.ClassVar = {
 637        TokenType.INSERT,
 638        TokenType.UPDATE,
 639        TokenType.DELETE,
 640        TokenType.TRUNCATE,
 641    }
 642
 643    ALTERABLES: t.ClassVar = {
 644        TokenType.INDEX,
 645        TokenType.TABLE,
 646        TokenType.VIEW,
 647        TokenType.SESSION,
 648    }
 649
 650    # Tokens that can represent identifiers
 651    ID_VAR_TOKENS: t.ClassVar[set] = {
 652        TokenType.ALL,
 653        TokenType.ANALYZE,
 654        TokenType.ATTACH,
 655        TokenType.VAR,
 656        TokenType.ANTI,
 657        TokenType.APPLY,
 658        TokenType.ASC,
 659        TokenType.ASOF,
 660        TokenType.AUTO_INCREMENT,
 661        TokenType.BEGIN,
 662        TokenType.BPCHAR,
 663        TokenType.CACHE,
 664        TokenType.CASE,
 665        TokenType.COLLATE,
 666        TokenType.COMMAND,
 667        TokenType.COMMENT,
 668        TokenType.COMMIT,
 669        TokenType.CONSTRAINT,
 670        TokenType.COPY,
 671        TokenType.CUBE,
 672        TokenType.CURRENT_SCHEMA,
 673        TokenType.DEFAULT,
 674        TokenType.DELETE,
 675        TokenType.DESC,
 676        TokenType.DESCRIBE,
 677        TokenType.DETACH,
 678        TokenType.DICTIONARY,
 679        TokenType.DIV,
 680        TokenType.END,
 681        TokenType.EXECUTE,
 682        TokenType.EXPORT,
 683        TokenType.ESCAPE,
 684        TokenType.FALSE,
 685        TokenType.FIRST,
 686        TokenType.FILE,
 687        TokenType.FILTER,
 688        TokenType.FINAL,
 689        TokenType.FORMAT,
 690        TokenType.FULL,
 691        TokenType.GET,
 692        TokenType.IDENTIFIER,
 693        TokenType.INOUT,
 694        TokenType.IS,
 695        TokenType.ISNULL,
 696        TokenType.INTERVAL,
 697        TokenType.KEEP,
 698        TokenType.KILL,
 699        TokenType.LEFT,
 700        TokenType.LIMIT,
 701        TokenType.LOAD,
 702        TokenType.LOCK,
 703        TokenType.MATCH,
 704        TokenType.MERGE,
 705        TokenType.NATURAL,
 706        TokenType.NEXT,
 707        TokenType.OFFSET,
 708        TokenType.OPERATOR,
 709        TokenType.ORDINALITY,
 710        TokenType.OVER,
 711        TokenType.OVERLAPS,
 712        TokenType.OVERWRITE,
 713        TokenType.PARTITION,
 714        TokenType.PERCENT,
 715        TokenType.PIVOT,
 716        TokenType.PRAGMA,
 717        TokenType.PUT,
 718        TokenType.RANGE,
 719        TokenType.RECURSIVE,
 720        TokenType.REFERENCES,
 721        TokenType.REFRESH,
 722        TokenType.RENAME,
 723        TokenType.REPLACE,
 724        TokenType.RIGHT,
 725        TokenType.ROLLUP,
 726        TokenType.ROW,
 727        TokenType.ROWS,
 728        TokenType.SEMI,
 729        TokenType.SET,
 730        TokenType.SETTINGS,
 731        TokenType.SHOW,
 732        TokenType.STREAM,
 733        TokenType.STREAMLIT,
 734        TokenType.TEMPORARY,
 735        TokenType.TOP,
 736        TokenType.TRUE,
 737        TokenType.TRUNCATE,
 738        TokenType.UNIQUE,
 739        TokenType.UNNEST,
 740        TokenType.UNPIVOT,
 741        TokenType.UPDATE,
 742        TokenType.USE,
 743        TokenType.VOLATILE,
 744        TokenType.WINDOW,
 745        TokenType.CURRENT_CATALOG,
 746        TokenType.LOCALTIME,
 747        TokenType.LOCALTIMESTAMP,
 748        TokenType.SESSION_USER,
 749        TokenType.STRAIGHT_JOIN,
 750        *ALTERABLES,
 751        *CREATABLES,
 752        *SUBQUERY_PREDICATES,
 753        *TYPE_TOKENS,
 754        *NO_PAREN_FUNCTIONS,
 755    } - {TokenType.UNION}
 756
 757    TABLE_ALIAS_TOKENS: t.ClassVar[set] = ID_VAR_TOKENS - {
 758        TokenType.ANTI,
 759        TokenType.ASOF,
 760        TokenType.FULL,
 761        TokenType.LEFT,
 762        TokenType.LOCK,
 763        TokenType.NATURAL,
 764        TokenType.RIGHT,
 765        TokenType.SEMI,
 766        TokenType.WINDOW,
 767    }
 768
 769    ALIAS_TOKENS: t.ClassVar = ID_VAR_TOKENS
 770
 771    COLON_PLACEHOLDER_TOKENS: t.ClassVar = ID_VAR_TOKENS
 772
 773    ARRAY_CONSTRUCTORS: t.ClassVar = {
 774        "ARRAY": exp.Array,
 775        "LIST": exp.List,
 776    }
 777
 778    COMMENT_TABLE_ALIAS_TOKENS: t.ClassVar = TABLE_ALIAS_TOKENS - {TokenType.IS}
 779
 780    UPDATE_ALIAS_TOKENS: t.ClassVar = TABLE_ALIAS_TOKENS - {TokenType.SET}
 781
 782    TRIM_TYPES: t.ClassVar = {"LEADING", "TRAILING", "BOTH"}
 783
 784    # Tokens that indicate a simple column reference
 785    IDENTIFIER_TOKENS: t.ClassVar[frozenset] = frozenset({TokenType.VAR, TokenType.IDENTIFIER})
 786
 787    BRACKETS: t.ClassVar[frozenset] = frozenset({TokenType.L_BRACKET, TokenType.L_BRACE})
 788
 789    # Postfix tokens that prevent the bare column fast path
 790    COLUMN_POSTFIX_TOKENS: t.ClassVar[frozenset] = frozenset(
 791        {
 792            TokenType.L_PAREN,
 793            TokenType.L_BRACKET,
 794            TokenType.L_BRACE,
 795            TokenType.COLON,
 796            TokenType.JOIN_MARKER,
 797        }
 798    )
 799
 800    TABLE_POSTFIX_TOKENS: t.ClassVar[frozenset] = frozenset(
 801        {
 802            TokenType.L_PAREN,
 803            TokenType.L_BRACKET,
 804            TokenType.L_BRACE,
 805            TokenType.PIVOT,
 806            TokenType.UNPIVOT,
 807            TokenType.TABLE_SAMPLE,
 808        }
 809    )
 810
 811    FUNC_TOKENS: t.ClassVar = {
 812        TokenType.COLLATE,
 813        TokenType.COMMAND,
 814        TokenType.CURRENT_DATE,
 815        TokenType.CURRENT_DATETIME,
 816        TokenType.CURRENT_SCHEMA,
 817        TokenType.CURRENT_TIMESTAMP,
 818        TokenType.CURRENT_TIME,
 819        TokenType.CURRENT_USER,
 820        TokenType.CURRENT_CATALOG,
 821        TokenType.FILTER,
 822        TokenType.FIRST,
 823        TokenType.FORMAT,
 824        TokenType.GET,
 825        TokenType.GLOB,
 826        TokenType.IDENTIFIER,
 827        TokenType.INDEX,
 828        TokenType.ISNULL,
 829        TokenType.ILIKE,
 830        TokenType.INSERT,
 831        TokenType.LIKE,
 832        TokenType.LOCALTIME,
 833        TokenType.LOCALTIMESTAMP,
 834        TokenType.MERGE,
 835        TokenType.NEXT,
 836        TokenType.OFFSET,
 837        TokenType.PRIMARY_KEY,
 838        TokenType.RANGE,
 839        TokenType.REPLACE,
 840        TokenType.RLIKE,
 841        TokenType.ROW,
 842        TokenType.SESSION_USER,
 843        TokenType.UNNEST,
 844        TokenType.VAR,
 845        TokenType.LEFT,
 846        TokenType.RIGHT,
 847        TokenType.SEQUENCE,
 848        TokenType.DATE,
 849        TokenType.DATETIME,
 850        TokenType.TABLE,
 851        TokenType.TIMESTAMP,
 852        TokenType.TIMESTAMPTZ,
 853        TokenType.TRUNCATE,
 854        TokenType.UTC_DATE,
 855        TokenType.UTC_TIME,
 856        TokenType.UTC_TIMESTAMP,
 857        TokenType.WINDOW,
 858        TokenType.XOR,
 859        *TYPE_TOKENS,
 860        *SUBQUERY_PREDICATES,
 861    }
 862
 863    CONJUNCTION: t.ClassVar[dict[TokenType, type[exp.Expr]]] = {
 864        TokenType.AND: exp.And,
 865    }
 866
 867    ASSIGNMENT: t.ClassVar[dict[TokenType, type[exp.Expr]]] = {
 868        TokenType.COLON_EQ: exp.PropertyEQ,
 869    }
 870
 871    DISJUNCTION: t.ClassVar[dict[TokenType, type[exp.Expr]]] = {
 872        TokenType.OR: exp.Or,
 873    }
 874
 875    EQUALITY: t.ClassVar = {
 876        TokenType.EQ: exp.EQ,
 877        TokenType.NEQ: exp.NEQ,
 878        TokenType.NULLSAFE_EQ: exp.NullSafeEQ,
 879    }
 880
 881    COMPARISON: t.ClassVar = {
 882        TokenType.GT: exp.GT,
 883        TokenType.GTE: exp.GTE,
 884        TokenType.LT: exp.LT,
 885        TokenType.LTE: exp.LTE,
 886    }
 887
 888    BITWISE: t.ClassVar = {
 889        TokenType.AMP: exp.BitwiseAnd,
 890        TokenType.CARET: exp.BitwiseXor,
 891        TokenType.PIPE: exp.BitwiseOr,
 892    }
 893
 894    TERM: t.ClassVar = {
 895        TokenType.DASH: exp.Sub,
 896        TokenType.PLUS: exp.Add,
 897        TokenType.MOD: exp.Mod,
 898        TokenType.COLLATE: exp.Collate,
 899    }
 900
 901    FACTOR: t.ClassVar = {
 902        TokenType.DIV: exp.IntDiv,
 903        TokenType.LR_ARROW: exp.Distance,
 904        TokenType.SLASH: exp.Div,
 905        TokenType.STAR: exp.Mul,
 906    }
 907
 908    EXPONENT: t.ClassVar[dict[TokenType, type[exp.Expr]]] = {}
 909
 910    TIMES: t.ClassVar = {
 911        TokenType.TIME,
 912        TokenType.TIMETZ,
 913    }
 914
 915    TIMESTAMPS: t.ClassVar = {
 916        TokenType.TIMESTAMP,
 917        TokenType.TIMESTAMPNTZ,
 918        TokenType.TIMESTAMPTZ,
 919        TokenType.TIMESTAMPLTZ,
 920        *TIMES,
 921    }
 922
 923    SET_OPERATIONS: t.ClassVar = {
 924        TokenType.UNION,
 925        TokenType.INTERSECT,
 926        TokenType.EXCEPT,
 927    }
 928
 929    JOIN_METHODS: t.ClassVar = {
 930        TokenType.ASOF,
 931        TokenType.NATURAL,
 932        TokenType.POSITIONAL,
 933    }
 934
 935    JOIN_SIDES: t.ClassVar = {
 936        TokenType.LEFT,
 937        TokenType.RIGHT,
 938        TokenType.FULL,
 939    }
 940
 941    JOIN_KINDS: t.ClassVar = {
 942        TokenType.ANTI,
 943        TokenType.CROSS,
 944        TokenType.INNER,
 945        TokenType.OUTER,
 946        TokenType.SEMI,
 947        TokenType.STRAIGHT_JOIN,
 948    }
 949
 950    JOIN_HINTS: t.ClassVar[set[str]] = set()
 951
 952    # Tokens that unambiguously end a table reference on the fast path
 953    TABLE_TERMINATORS: t.ClassVar[frozenset] = frozenset(
 954        {
 955            TokenType.COMMA,
 956            TokenType.GROUP_BY,
 957            TokenType.HAVING,
 958            TokenType.JOIN,
 959            TokenType.LIMIT,
 960            TokenType.ON,
 961            TokenType.ORDER_BY,
 962            TokenType.R_PAREN,
 963            TokenType.SEMICOLON,
 964            TokenType.SENTINEL,
 965            TokenType.WHERE,
 966            *SET_OPERATIONS,
 967            *JOIN_KINDS,
 968            *JOIN_METHODS,
 969            *JOIN_SIDES,
 970        }
 971    )
 972
 973    LAMBDAS: t.ClassVar = {
 974        TokenType.ARROW: lambda self, expressions: self.expression(
 975            exp.Lambda(
 976                this=self._replace_lambda(
 977                    self._parse_disjunction(),
 978                    expressions,
 979                ),
 980                expressions=expressions,
 981            )
 982        ),
 983        TokenType.FARROW: lambda self, expressions: self.expression(
 984            exp.Kwarg(this=exp.var(expressions[0].name), expression=self._parse_disjunction())
 985        ),
 986    }
 987
 988    # Whether lambda args include type annotations, e.g. TRANSFORM(arr, x INT -> x + 1) in Snowflake
 989    TYPED_LAMBDA_ARGS: t.ClassVar[bool] = False
 990
 991    LAMBDA_ARG_TERMINATORS: t.ClassVar[frozenset] = frozenset({TokenType.COMMA, TokenType.R_PAREN})
 992
 993    COLUMN_OPERATORS: t.ClassVar = {
 994        TokenType.DOT: None,
 995        TokenType.DOTCOLON: lambda self, this, to: self.expression(exp.JSONCast(this=this, to=to)),
 996        TokenType.DCOLON: lambda self, this, to: self.build_cast(
 997            strict=self.STRICT_CAST, this=this, to=to
 998        ),
 999        TokenType.ARROW: lambda self, this, path: self.expression(
1000            exp.JSONExtract(
1001                this=this,
1002                expression=self.dialect.to_json_path(path),
1003                only_json_types=self.JSON_ARROWS_REQUIRE_JSON_TYPE,
1004            )
1005        ),
1006        TokenType.DARROW: lambda self, this, path: self.expression(
1007            exp.JSONExtractScalar(
1008                this=this,
1009                expression=self.dialect.to_json_path(path),
1010                only_json_types=self.JSON_ARROWS_REQUIRE_JSON_TYPE,
1011                scalar_only=self.dialect.JSON_EXTRACT_SCALAR_SCALAR_ONLY,
1012            )
1013        ),
1014        TokenType.HASH_ARROW: lambda self, this, path: self.expression(
1015            exp.JSONBExtract(this=this, expression=path)
1016        ),
1017        TokenType.DHASH_ARROW: lambda self, this, path: self.expression(
1018            exp.JSONBExtractScalar(this=this, expression=path)
1019        ),
1020        TokenType.PLACEHOLDER: lambda self, this, key: self.expression(
1021            exp.JSONBContains(this=this, expression=key)
1022        ),
1023    }
1024
1025    CAST_COLUMN_OPERATORS: t.ClassVar = {
1026        TokenType.DOTCOLON,
1027        TokenType.DCOLON,
1028    }
1029
1030    EXPRESSION_PARSERS: t.ClassVar = {
1031        exp.Cluster: lambda self: self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY),
1032        exp.Column: lambda self: self._parse_column(),
1033        exp.ColumnDef: lambda self: self._parse_column_def(self._parse_column()),
1034        exp.Condition: lambda self: self._parse_disjunction(),
1035        exp.DataType: lambda self: self._parse_types(allow_identifiers=False, schema=True),
1036        exp.Expr: lambda self: self._parse_expression(),
1037        exp.From: lambda self: self._parse_from(joins=True),
1038        exp.GrantPrincipal: lambda self: self._parse_grant_principal(),
1039        exp.GrantPrivilege: lambda self: self._parse_grant_privilege(),
1040        exp.Group: lambda self: self._parse_group(),
1041        exp.Having: lambda self: self._parse_having(),
1042        exp.Hint: lambda self: self._parse_hint_body(),
1043        exp.Identifier: lambda self: self._parse_id_var(),
1044        exp.Join: lambda self: self._parse_join(),
1045        exp.Lambda: lambda self: self._parse_lambda(),
1046        exp.Lateral: lambda self: self._parse_lateral(),
1047        exp.Limit: lambda self: self._parse_limit(),
1048        exp.Offset: lambda self: self._parse_offset(),
1049        exp.Order: lambda self: self._parse_order(),
1050        exp.Ordered: lambda self: self._parse_ordered(),
1051        exp.Properties: lambda self: self._parse_properties(),
1052        exp.PartitionedByProperty: lambda self: self._parse_partitioned_by(),
1053        exp.Qualify: lambda self: self._parse_qualify(),
1054        exp.Returning: lambda self: self._parse_returning(),
1055        exp.Select: lambda self: self._parse_select(),
1056        exp.Sort: lambda self: self._parse_sort(exp.Sort, TokenType.SORT_BY),
1057        exp.Table: lambda self: self._parse_table_parts(),
1058        exp.TableAlias: lambda self: self._parse_table_alias(),
1059        exp.Tuple: lambda self: self._parse_value(values=False),
1060        exp.Whens: lambda self: self._parse_when_matched(),
1061        exp.Where: lambda self: self._parse_where(),
1062        exp.Window: lambda self: self._parse_named_window(),
1063        exp.With: lambda self: self._parse_with(),
1064    }
1065
1066    STATEMENT_PARSERS: t.ClassVar = {
1067        TokenType.ALTER: lambda self: self._parse_alter(),
1068        TokenType.ANALYZE: lambda self: self._parse_analyze(),
1069        TokenType.BEGIN: lambda self: self._parse_transaction(),
1070        TokenType.CACHE: lambda self: self._parse_cache(),
1071        TokenType.COMMENT: lambda self: self._parse_comment(),
1072        TokenType.COMMIT: lambda self: self._parse_commit_or_rollback(),
1073        TokenType.COPY: lambda self: self._parse_copy(),
1074        TokenType.CREATE: lambda self: self._parse_create(),
1075        TokenType.DELETE: lambda self: self._parse_delete(),
1076        TokenType.DESC: lambda self: self._parse_describe(),
1077        TokenType.DESCRIBE: lambda self: self._parse_describe(),
1078        TokenType.DROP: lambda self: self._parse_drop(),
1079        TokenType.GRANT: lambda self: self._parse_grant(),
1080        TokenType.REVOKE: lambda self: self._parse_revoke(),
1081        TokenType.INSERT: lambda self: self._parse_insert(),
1082        TokenType.KILL: lambda self: self._parse_kill(),
1083        TokenType.LOAD: lambda self: self._parse_load(),
1084        TokenType.MERGE: lambda self: self._parse_merge(),
1085        TokenType.PIVOT: lambda self: self._parse_simplified_pivot(),
1086        TokenType.PRAGMA: lambda self: self.expression(exp.Pragma(this=self._parse_expression())),
1087        TokenType.REFRESH: lambda self: self._parse_refresh(),
1088        TokenType.ROLLBACK: lambda self: self._parse_commit_or_rollback(),
1089        TokenType.SET: lambda self: self._parse_set(),
1090        TokenType.TRUNCATE: lambda self: self._parse_truncate_table(),
1091        TokenType.UNCACHE: lambda self: self._parse_uncache(),
1092        TokenType.UNPIVOT: lambda self: self._parse_simplified_pivot(is_unpivot=True),
1093        TokenType.UPDATE: lambda self: self._parse_update(),
1094        TokenType.USE: lambda self: self._parse_use(),
1095        TokenType.SEMICOLON: lambda self: exp.Semicolon(),
1096    }
1097
1098    UNARY_PARSERS: t.ClassVar = {
1099        TokenType.PLUS: lambda self: self._parse_unary(),  # Unary + is handled as a no-op
1100        TokenType.NOT: lambda self: self.expression(exp.Not(this=self._parse_equality())),
1101        TokenType.TILDE: lambda self: self.expression(exp.BitwiseNot(this=self._parse_unary())),
1102        TokenType.DASH: lambda self: self.expression(exp.Neg(this=self._parse_unary())),
1103        TokenType.PIPE_SLASH: lambda self: self.expression(exp.Sqrt(this=self._parse_unary())),
1104        TokenType.DPIPE_SLASH: lambda self: self.expression(exp.Cbrt(this=self._parse_unary())),
1105    }
1106
1107    STRING_PARSERS: t.ClassVar = {
1108        TokenType.HEREDOC_STRING: lambda self, token: self.expression(
1109            exp.RawString(this=token.text), token
1110        ),
1111        TokenType.NATIONAL_STRING: lambda self, token: self.expression(
1112            exp.National(this=token.text), token
1113        ),
1114        TokenType.RAW_STRING: lambda self, token: self.expression(
1115            exp.RawString(this=token.text), token
1116        ),
1117        TokenType.STRING: lambda self, token: self.expression(
1118            exp.Literal(this=token.text, is_string=True), token
1119        ),
1120        TokenType.UNICODE_STRING: lambda self, token: self.expression(
1121            exp.UnicodeString(
1122                this=token.text, escape=self._match_text_seq("UESCAPE") and self._parse_string()
1123            ),
1124            token,
1125        ),
1126    }
1127
1128    NUMERIC_PARSERS: t.ClassVar = {
1129        TokenType.BIT_STRING: lambda self, token: self.expression(
1130            exp.BitString(this=token.text), token
1131        ),
1132        TokenType.BYTE_STRING: lambda self, token: self.expression(
1133            exp.ByteString(
1134                this=token.text, is_bytes=self.dialect.BYTE_STRING_IS_BYTES_TYPE or None
1135            ),
1136            token,
1137        ),
1138        TokenType.HEX_STRING: lambda self, token: self.expression(
1139            exp.HexString(
1140                this=token.text, is_integer=self.dialect.HEX_STRING_IS_INTEGER_TYPE or None
1141            ),
1142            token,
1143        ),
1144        TokenType.NUMBER: lambda self, token: self.expression(
1145            exp.Literal(this=token.text, is_string=False), token
1146        ),
1147    }
1148
1149    PRIMARY_PARSERS: t.ClassVar = {
1150        **STRING_PARSERS,
1151        **NUMERIC_PARSERS,
1152        TokenType.INTRODUCER: lambda self, token: self._parse_introducer(token),
1153        TokenType.NULL: lambda self, _: self.expression(exp.Null()),
1154        TokenType.TRUE: lambda self, _: self.expression(exp.Boolean(this=True)),
1155        TokenType.FALSE: lambda self, _: self.expression(exp.Boolean(this=False)),
1156        TokenType.SESSION_PARAMETER: lambda self, _: self._parse_session_parameter(),
1157        TokenType.STAR: lambda self, _: self._parse_star_ops(),
1158    }
1159
1160    PLACEHOLDER_PARSERS: t.ClassVar = {
1161        TokenType.PLACEHOLDER: lambda self: self.expression(exp.Placeholder()),
1162        TokenType.PARAMETER: lambda self: self._parse_parameter(),
1163        TokenType.COLON: lambda self: (
1164            self.expression(exp.Placeholder(this=self._prev.text))
1165            if self._match_set(self.COLON_PLACEHOLDER_TOKENS)
1166            else None
1167        ),
1168    }
1169
1170    RANGE_PARSERS: t.ClassVar = {
1171        TokenType.AT_GT: binary_range_parser(exp.ArrayContainsAll),
1172        TokenType.BETWEEN: lambda self, this: self._parse_between(this),
1173        TokenType.GLOB: binary_range_parser(exp.Glob),
1174        TokenType.ILIKE: binary_range_parser(exp.ILike),
1175        TokenType.IN: lambda self, this: self._parse_in(this),
1176        TokenType.IRLIKE: binary_range_parser(exp.RegexpILike),
1177        TokenType.IS: lambda self, this: self._parse_is(this),
1178        TokenType.LIKE: binary_range_parser(exp.Like),
1179        TokenType.LT_AT: binary_range_parser(exp.ArrayContainsAll, reverse_args=True),
1180        TokenType.OVERLAPS: binary_range_parser(exp.Overlaps),
1181        TokenType.RLIKE: binary_range_parser(exp.RegexpLike),
1182        TokenType.SIMILAR_TO: binary_range_parser(exp.SimilarTo),
1183        TokenType.FOR: lambda self, this: self._parse_comprehension(this),
1184        TokenType.QMARK_AMP: binary_range_parser(exp.JSONBContainsAllTopKeys),
1185        TokenType.QMARK_PIPE: binary_range_parser(exp.JSONBContainsAnyTopKeys),
1186        TokenType.HASH_DASH: binary_range_parser(exp.JSONBDeleteAtPath),
1187        TokenType.ADJACENT: binary_range_parser(exp.Adjacent),
1188        TokenType.OPERATOR: lambda self, this: self._parse_operator(this),
1189        TokenType.AMP_LT: binary_range_parser(exp.ExtendsLeft),
1190        TokenType.AMP_GT: binary_range_parser(exp.ExtendsRight),
1191    }
1192
1193    PIPE_SYNTAX_TRANSFORM_PARSERS: t.ClassVar = {
1194        "AGGREGATE": lambda self, query: self._parse_pipe_syntax_aggregate(query),
1195        "AS": lambda self, query: self._build_pipe_cte(
1196            query, [exp.Star()], self._parse_table_alias()
1197        ),
1198        "DISTINCT": lambda self, query: self._advance() or query.distinct(copy=False),
1199        "EXTEND": lambda self, query: self._parse_pipe_syntax_extend(query),
1200        "LIMIT": lambda self, query: self._parse_pipe_syntax_limit(query),
1201        "ORDER BY": lambda self, query: query.order_by(
1202            self._parse_order(), append=False, copy=False
1203        ),
1204        "PIVOT": lambda self, query: self._parse_pipe_syntax_pivot(query),
1205        "SELECT": lambda self, query: self._parse_pipe_syntax_select(query),
1206        "TABLESAMPLE": lambda self, query: self._parse_pipe_syntax_tablesample(query),
1207        "UNPIVOT": lambda self, query: self._parse_pipe_syntax_pivot(query),
1208        "WHERE": lambda self, query: query.where(self._parse_where(), copy=False),
1209    }
1210
1211    PROPERTY_PARSERS: t.ClassVar[dict[str, t.Callable]] = {
1212        "ALLOWED_VALUES": lambda self: self.expression(
1213            exp.AllowedValuesProperty(expressions=self._parse_csv(self._parse_primary))
1214        ),
1215        "ALGORITHM": lambda self: self._parse_property_assignment(exp.AlgorithmProperty),
1216        "AUTO": lambda self: self._parse_auto_property(),
1217        "AUTO_INCREMENT": lambda self: self._parse_property_assignment(exp.AutoIncrementProperty),
1218        "BACKUP": lambda self: self.expression(
1219            exp.BackupProperty(this=self._parse_var(any_token=True))
1220        ),
1221        "BLOCKCOMPRESSION": lambda self: self._parse_blockcompression(),
1222        "CHARSET": lambda self, **kwargs: self._parse_character_set(**kwargs),
1223        "CHARACTER SET": lambda self, **kwargs: self._parse_character_set(**kwargs),
1224        "CHECKSUM": lambda self: self._parse_checksum(),
1225        "CLUSTER BY": lambda self: self._parse_cluster(),
1226        "CLUSTERED": lambda self: self._parse_clustered_by(),
1227        "COLLATE": lambda self, **kwargs: self._parse_property_assignment(
1228            exp.CollateProperty, **kwargs
1229        ),
1230        "COMMENT": lambda self: self._parse_property_assignment(exp.SchemaCommentProperty),
1231        "CONTAINS": lambda self: self._parse_contains_property(),
1232        "COPY": lambda self: self._parse_copy_property(),
1233        "DATABLOCKSIZE": lambda self, **kwargs: self._parse_datablocksize(**kwargs),
1234        "DATA_DELETION": lambda self: self._parse_data_deletion_property(),
1235        "DEFINER": lambda self: self._parse_definer(),
1236        "DETERMINISTIC": lambda self: self.expression(
1237            exp.StabilityProperty(this=exp.Literal.string("IMMUTABLE"))
1238        ),
1239        "DISTRIBUTED": lambda self: self._parse_distributed_property(),
1240        "DUPLICATE": lambda self: self._parse_composite_key_property(exp.DuplicateKeyProperty),
1241        "DYNAMIC": lambda self: self.expression(exp.DynamicProperty()),
1242        "DISTKEY": lambda self: self._parse_distkey(),
1243        "DISTSTYLE": lambda self: self._parse_property_assignment(exp.DistStyleProperty),
1244        "EMPTY": lambda self: self.expression(exp.EmptyProperty()),
1245        "ENGINE": lambda self: self._parse_property_assignment(exp.EngineProperty),
1246        "ENVIRONMENT": lambda self: self.expression(
1247            exp.EnviromentProperty(expressions=self._parse_wrapped_csv(self._parse_assignment))
1248        ),
1249        "HANDLER": lambda self: self._parse_property_assignment(exp.HandlerProperty),
1250        "EXECUTE": lambda self: self._parse_property_assignment(exp.ExecuteAsProperty),
1251        "EXTERNAL": lambda self: self.expression(exp.ExternalProperty()),
1252        "FALLBACK": lambda self, **kwargs: self._parse_fallback(**kwargs),
1253        "FORMAT": lambda self: self._parse_property_assignment(exp.FileFormatProperty),
1254        "FREESPACE": lambda self: self._parse_freespace(),
1255        "GLOBAL": lambda self: self.expression(exp.GlobalProperty()),
1256        "HEAP": lambda self: self.expression(exp.HeapProperty()),
1257        "ICEBERG": lambda self: self.expression(exp.IcebergProperty()),
1258        "IMMUTABLE": lambda self: self.expression(
1259            exp.StabilityProperty(this=exp.Literal.string("IMMUTABLE"))
1260        ),
1261        "INHERITS": lambda self: self.expression(
1262            exp.InheritsProperty(expressions=self._parse_wrapped_csv(self._parse_table))
1263        ),
1264        "INPUT": lambda self: self.expression(exp.InputModelProperty(this=self._parse_schema())),
1265        "JOURNAL": lambda self, **kwargs: self._parse_journal(**kwargs),
1266        "LANGUAGE": lambda self: self._parse_property_assignment(exp.LanguageProperty),
1267        "LAYOUT": lambda self: self._parse_dict_property(this="LAYOUT"),
1268        "LIFETIME": lambda self: self._parse_dict_range(this="LIFETIME"),
1269        "LIKE": lambda self: self._parse_create_like(),
1270        "LOCATION": lambda self: self._parse_property_assignment(exp.LocationProperty),
1271        "LOCK": lambda self: self._parse_locking(),
1272        "LOCKING": lambda self: self._parse_locking(),
1273        "LOG": lambda self, **kwargs: self._parse_log(**kwargs),
1274        "MATERIALIZED": lambda self: self.expression(exp.MaterializedProperty()),
1275        "MERGEBLOCKRATIO": lambda self, **kwargs: self._parse_mergeblockratio(**kwargs),
1276        "MODIFIES": lambda self: self._parse_modifies_property(),
1277        "MULTISET": lambda self: self.expression(exp.SetProperty(multi=True)),
1278        "NO": lambda self: self._parse_no_property(),
1279        "ON": lambda self: self._parse_on_property(),
1280        "ORDER BY": lambda self: self._parse_order(skip_order_token=True),
1281        "OUTPUT": lambda self: self.expression(exp.OutputModelProperty(this=self._parse_schema())),
1282        "PARTITION": lambda self: self._parse_partitioned_of(),
1283        "PARTITION BY": lambda self: self._parse_partitioned_by(),
1284        "PARTITIONED BY": lambda self: self._parse_partitioned_by(),
1285        "PARTITIONED_BY": lambda self: self._parse_partitioned_by(),
1286        "PRIMARY KEY": lambda self: self._parse_primary_key(in_props=True),
1287        "RANGE": lambda self: self._parse_dict_range(this="RANGE"),
1288        "READS": lambda self: self._parse_reads_property(),
1289        "REMOTE": lambda self: self._parse_remote_with_connection(),
1290        "RETURNS": lambda self: self._parse_returns(),
1291        "STRICT": lambda self: self.expression(exp.StrictProperty()),
1292        "STREAMING": lambda self: self.expression(exp.StreamingTableProperty()),
1293        "ROW": lambda self: self._parse_row(),
1294        "ROW_FORMAT": lambda self: self._parse_property_assignment(exp.RowFormatProperty),
1295        "SAMPLE": lambda self: self.expression(
1296            exp.SampleProperty(this=self._match_text_seq("BY") and self._parse_bitwise())
1297        ),
1298        "SECURE": lambda self: self.expression(exp.SecureProperty()),
1299        "SECURITY": lambda self: self._parse_sql_security(),
1300        "SQL SECURITY": lambda self: self._parse_sql_security(),
1301        "SET": lambda self: self.expression(exp.SetProperty(multi=False)),
1302        "SETTINGS": lambda self: self._parse_settings_property(),
1303        "SHARING": lambda self: self._parse_property_assignment(exp.SharingProperty),
1304        "SORTKEY": lambda self: self._parse_sortkey(),
1305        "SOURCE": lambda self: self._parse_dict_property(this="SOURCE"),
1306        "STABLE": lambda self: self.expression(
1307            exp.StabilityProperty(this=exp.Literal.string("STABLE"))
1308        ),
1309        "STORED": lambda self: self._parse_stored(),
1310        "SYSTEM_VERSIONING": lambda self: self._parse_system_versioning_property(),
1311        "TBLPROPERTIES": lambda self: self._parse_wrapped_properties(),
1312        "TEMP": lambda self: self.expression(exp.TemporaryProperty()),
1313        "TEMPORARY": lambda self: self.expression(exp.TemporaryProperty()),
1314        "TO": lambda self: self._parse_to_table(),
1315        "TRANSIENT": lambda self: self.expression(exp.TransientProperty()),
1316        "TRANSFORM": lambda self: self.expression(
1317            exp.TransformModelProperty(expressions=self._parse_wrapped_csv(self._parse_expression))
1318        ),
1319        "TTL": lambda self: self._parse_ttl(),
1320        "USING": lambda self: self._parse_property_assignment(exp.FileFormatProperty),
1321        "UNLOGGED": lambda self: self.expression(exp.UnloggedProperty()),
1322        "VOLATILE": lambda self: self._parse_volatile_property(),
1323        "WITH": lambda self: self._parse_with_property(),
1324    }
1325
1326    CONSTRAINT_PARSERS: t.ClassVar = {
1327        "AUTOINCREMENT": lambda self: self._parse_auto_increment(),
1328        "AUTO_INCREMENT": lambda self: self._parse_auto_increment(),
1329        "CASESPECIFIC": lambda self: self.expression(exp.CaseSpecificColumnConstraint(not_=False)),
1330        "CHARACTER SET": lambda self: self.expression(
1331            exp.CharacterSetColumnConstraint(this=self._parse_var_or_string())
1332        ),
1333        "CHECK": lambda self: self._parse_check_constraint(),
1334        "COLLATE": lambda self: self.expression(
1335            exp.CollateColumnConstraint(this=self._parse_identifier() or self._parse_column())
1336        ),
1337        "COMMENT": lambda self: self.expression(
1338            exp.CommentColumnConstraint(this=self._parse_string())
1339        ),
1340        "COMPRESS": lambda self: self._parse_compress(),
1341        "CLUSTERED": lambda self: self.expression(
1342            exp.ClusteredColumnConstraint(this=self._parse_wrapped_csv(self._parse_ordered))
1343        ),
1344        "NONCLUSTERED": lambda self: self.expression(
1345            exp.NonClusteredColumnConstraint(this=self._parse_wrapped_csv(self._parse_ordered))
1346        ),
1347        "DEFAULT": lambda self: self.expression(
1348            exp.DefaultColumnConstraint(this=self._parse_bitwise())
1349        ),
1350        "ENCODE": lambda self: self.expression(exp.EncodeColumnConstraint(this=self._parse_var())),
1351        "EPHEMERAL": lambda self: self.expression(
1352            exp.EphemeralColumnConstraint(this=self._parse_bitwise())
1353        ),
1354        "EXCLUDE": lambda self: self.expression(
1355            exp.ExcludeColumnConstraint(this=self._parse_index_params())
1356        ),
1357        "FOREIGN KEY": lambda self: self._parse_foreign_key(),
1358        "FORMAT": lambda self: self.expression(
1359            exp.DateFormatColumnConstraint(this=self._parse_var_or_string())
1360        ),
1361        "GENERATED": lambda self: self._parse_generated_as_identity(),
1362        "IDENTITY": lambda self: self._parse_auto_increment(),
1363        "INLINE": lambda self: self._parse_inline(),
1364        "LIKE": lambda self: self._parse_create_like(),
1365        "NOT": lambda self: self._parse_not_constraint(),
1366        "NULL": lambda self: self.expression(exp.NotNullColumnConstraint(allow_null=True)),
1367        "ON": lambda self: (
1368            (
1369                self._match(TokenType.UPDATE)
1370                and self.expression(exp.OnUpdateColumnConstraint(this=self._parse_function()))
1371            )
1372            or self.expression(exp.OnProperty(this=self._parse_id_var()))
1373        ),
1374        "PATH": lambda self: self.expression(exp.PathColumnConstraint(this=self._parse_string())),
1375        "PERIOD": lambda self: self._parse_period_for_system_time(),
1376        "PRIMARY KEY": lambda self: self._parse_primary_key(),
1377        "REFERENCES": lambda self: self._parse_references(match=False),
1378        "TITLE": lambda self: self.expression(
1379            exp.TitleColumnConstraint(this=self._parse_var_or_string())
1380        ),
1381        "TTL": lambda self: self.expression(exp.MergeTreeTTL(expressions=[self._parse_bitwise()])),
1382        "UNIQUE": lambda self: self._parse_unique(),
1383        "UPPERCASE": lambda self: self.expression(exp.UppercaseColumnConstraint()),
1384        "WITH": lambda self: self.expression(
1385            exp.Properties(expressions=self._parse_wrapped_properties())
1386        ),
1387        "BUCKET": lambda self: self._parse_partitioned_by_bucket_or_truncate(),
1388        "TRUNCATE": lambda self: self._parse_partitioned_by_bucket_or_truncate(),
1389    }
1390
1391    def _parse_partitioned_by_bucket_or_truncate(self) -> exp.Expr | None:
1392        if not self._match(TokenType.L_PAREN, advance=False):
1393            # Partitioning by bucket or truncate follows the syntax:
1394            # PARTITION BY (BUCKET(..) | TRUNCATE(..))
1395            # If we don't have parenthesis after each keyword, we should instead parse this as an identifier
1396            self._retreat(self._index - 1)
1397            return None
1398
1399        klass = (
1400            exp.PartitionedByBucket
1401            if self._prev.text.upper() == "BUCKET"
1402            else exp.PartitionByTruncate
1403        )
1404
1405        args = self._parse_wrapped_csv(lambda: self._parse_primary() or self._parse_column())
1406        this, expression = seq_get(args, 0), seq_get(args, 1)
1407
1408        if isinstance(this, exp.Literal):
1409            # Check for Iceberg partition transforms (bucket / truncate) and ensure their arguments are in the right order
1410            #  - For Hive, it's `bucket(<num buckets>, <col name>)` or `truncate(<num_chars>, <col_name>)`
1411            #  - For Trino, it's reversed - `bucket(<col name>, <num buckets>)` or `truncate(<col_name>, <num_chars>)`
1412            # Both variants are canonicalized in the latter i.e `bucket(<col name>, <num buckets>)`
1413            #
1414            # Hive ref: https://docs.aws.amazon.com/athena/latest/ug/querying-iceberg-creating-tables.html#querying-iceberg-partitioning
1415            # Trino ref: https://docs.aws.amazon.com/athena/latest/ug/create-table-as.html#ctas-table-properties
1416            this, expression = expression, this
1417
1418        return self.expression(klass(this=this, expression=expression))
1419
1420    ALTER_PARSERS: t.ClassVar = {
1421        "ADD": lambda self: self._parse_alter_table_add(),
1422        "AS": lambda self: self._parse_select(),
1423        "ALTER": lambda self: self._parse_alter_table_alter(),
1424        "CLUSTER BY": lambda self: self._parse_cluster(wrapped=True),
1425        "DELETE": lambda self: self.expression(exp.Delete(where=self._parse_where())),
1426        "DROP": lambda self: self._parse_alter_table_drop(),
1427        "RENAME": lambda self: self._parse_alter_table_rename(),
1428        "SET": lambda self: self._parse_alter_table_set(),
1429        "SWAP": lambda self: self.expression(
1430            exp.SwapTable(this=self._match(TokenType.WITH) and self._parse_table(schema=True))
1431        ),
1432    }
1433
1434    ALTER_ALTER_PARSERS: t.ClassVar = {
1435        "DISTKEY": lambda self: self._parse_alter_diststyle(),
1436        "DISTSTYLE": lambda self: self._parse_alter_diststyle(),
1437        "SORTKEY": lambda self: self._parse_alter_sortkey(),
1438        "COMPOUND": lambda self: self._parse_alter_sortkey(compound=True),
1439    }
1440
1441    SCHEMA_UNNAMED_CONSTRAINTS: t.ClassVar = {
1442        "CHECK",
1443        "EXCLUDE",
1444        "FOREIGN KEY",
1445        "LIKE",
1446        "PERIOD",
1447        "PRIMARY KEY",
1448        "UNIQUE",
1449        "BUCKET",
1450        "TRUNCATE",
1451    }
1452
1453    NO_PAREN_FUNCTION_PARSERS: t.ClassVar = {
1454        "ANY": lambda self: self.expression(exp.Any(this=self._parse_bitwise())),
1455        "CASE": lambda self: self._parse_case(),
1456        "CONNECT_BY_ROOT": lambda self: self.expression(
1457            exp.ConnectByRoot(this=self._parse_column())
1458        ),
1459        "IF": lambda self: self._parse_if(),
1460    }
1461
1462    INVALID_FUNC_NAME_TOKENS: t.ClassVar = {
1463        TokenType.IDENTIFIER,
1464        TokenType.STRING,
1465    }
1466
1467    FUNCTIONS_WITH_ALIASED_ARGS: t.ClassVar = {"STRUCT"}
1468
1469    KEY_VALUE_DEFINITIONS: t.ClassVar = (exp.Alias, exp.EQ, exp.PropertyEQ, exp.Slice)
1470
1471    FUNCTION_PARSERS: t.ClassVar[dict[str, t.Callable]] = {
1472        **{
1473            name: lambda self: self._parse_max_min_by(exp.ArgMax) for name in exp.ArgMax.sql_names()
1474        },
1475        **{
1476            name: lambda self: self._parse_max_min_by(exp.ArgMin) for name in exp.ArgMin.sql_names()
1477        },
1478        "CAST": lambda self: self._parse_cast(self.STRICT_CAST),
1479        "CEIL": lambda self: self._parse_ceil_floor(exp.Ceil),
1480        "CONVERT": lambda self: self._parse_convert(self.STRICT_CAST),
1481        "CHAR": lambda self: self._parse_char(),
1482        "CHR": lambda self: self._parse_char(),
1483        "DECODE": lambda self: self._parse_decode(),
1484        "EXTRACT": lambda self: self._parse_extract(),
1485        "FLOOR": lambda self: self._parse_ceil_floor(exp.Floor),
1486        "GAP_FILL": lambda self: self._parse_gap_fill(),
1487        "INITCAP": lambda self: self._parse_initcap(),
1488        "JSON_OBJECT": lambda self: self._parse_json_object(),
1489        "JSON_OBJECTAGG": lambda self: self._parse_json_object(agg=True),
1490        "JSON_TABLE": lambda self: self._parse_json_table(),
1491        "MATCH": lambda self: self._parse_match_against(),
1492        "NORMALIZE": lambda self: self._parse_normalize(),
1493        "OPENJSON": lambda self: self._parse_open_json(),
1494        "OVERLAY": lambda self: self._parse_overlay(),
1495        "POSITION": lambda self: self._parse_position(),
1496        "SAFE_CAST": lambda self: self._parse_cast(False, safe=True),
1497        "STRING_AGG": lambda self: self._parse_string_agg(),
1498        "SUBSTRING": lambda self: self._parse_substring(),
1499        "TRIM": lambda self: self._parse_trim(),
1500        "TRY_CAST": lambda self: self._parse_cast(False, safe=True),
1501        "TRY_CONVERT": lambda self: self._parse_convert(False, safe=True),
1502        "XMLELEMENT": lambda self: self._parse_xml_element(),
1503        "XMLTABLE": lambda self: self._parse_xml_table(),
1504    }
1505
1506    QUERY_MODIFIER_PARSERS: t.ClassVar = {
1507        TokenType.MATCH_RECOGNIZE: lambda self: ("match", self._parse_match_recognize()),
1508        TokenType.PREWHERE: lambda self: ("prewhere", self._parse_prewhere()),
1509        TokenType.WHERE: lambda self: ("where", self._parse_where()),
1510        TokenType.GROUP_BY: lambda self: ("group", self._parse_group()),
1511        TokenType.HAVING: lambda self: ("having", self._parse_having()),
1512        TokenType.QUALIFY: lambda self: ("qualify", self._parse_qualify()),
1513        TokenType.WINDOW: lambda self: ("windows", self._parse_window_clause()),
1514        TokenType.ORDER_BY: lambda self: ("order", self._parse_order()),
1515        TokenType.LIMIT: lambda self: ("limit", self._parse_limit()),
1516        TokenType.FETCH: lambda self: ("limit", self._parse_limit()),
1517        TokenType.OFFSET: lambda self: ("offset", self._parse_offset()),
1518        TokenType.FOR: lambda self: ("locks", self._parse_locks()),
1519        TokenType.LOCK: lambda self: ("locks", self._parse_locks()),
1520        TokenType.TABLE_SAMPLE: lambda self: ("sample", self._parse_table_sample(as_modifier=True)),
1521        TokenType.USING: lambda self: ("sample", self._parse_table_sample(as_modifier=True)),
1522        TokenType.CLUSTER_BY: lambda self: (
1523            "cluster",
1524            self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY),
1525        ),
1526        TokenType.DISTRIBUTE_BY: lambda self: (
1527            "distribute",
1528            self._parse_sort(exp.Distribute, TokenType.DISTRIBUTE_BY),
1529        ),
1530        TokenType.SORT_BY: lambda self: ("sort", self._parse_sort(exp.Sort, TokenType.SORT_BY)),
1531        TokenType.CONNECT_BY: lambda self: ("connect", self._parse_connect(skip_start_token=True)),
1532        TokenType.START_WITH: lambda self: ("connect", self._parse_connect()),
1533    }
1534    QUERY_MODIFIER_TOKENS: t.ClassVar = set(QUERY_MODIFIER_PARSERS)
1535
1536    SET_PARSERS: t.ClassVar = {
1537        "GLOBAL": lambda self: self._parse_set_item_assignment("GLOBAL"),
1538        "LOCAL": lambda self: self._parse_set_item_assignment("LOCAL"),
1539        "SESSION": lambda self: self._parse_set_item_assignment("SESSION"),
1540        "TRANSACTION": lambda self: self._parse_set_transaction(),
1541    }
1542
1543    SHOW_PARSERS: t.ClassVar[dict[str, t.Callable]] = {}
1544
1545    TYPE_LITERAL_PARSERS: t.ClassVar = {
1546        exp.DType.JSON: lambda self, this, _: self.expression(exp.ParseJSON(this=this)),
1547    }
1548
1549    TYPE_CONVERTERS: t.ClassVar[dict[exp.DType, t.Callable[[exp.DataType], exp.DataType]]] = {}
1550
1551    DDL_SELECT_TOKENS: t.ClassVar = {TokenType.SELECT, TokenType.WITH, TokenType.L_PAREN}
1552
1553    PRE_VOLATILE_TOKENS: t.ClassVar = {TokenType.CREATE, TokenType.REPLACE, TokenType.UNIQUE}
1554
1555    TRANSACTION_KIND: t.ClassVar = {"DEFERRED", "IMMEDIATE", "EXCLUSIVE"}
1556    TRANSACTION_CHARACTERISTICS: t.ClassVar[OPTIONS_TYPE] = {
1557        "ISOLATION": (
1558            ("LEVEL", "REPEATABLE", "READ"),
1559            ("LEVEL", "READ", "COMMITTED"),
1560            ("LEVEL", "READ", "UNCOMITTED"),
1561            ("LEVEL", "SERIALIZABLE"),
1562        ),
1563        "READ": ("WRITE", "ONLY"),
1564    }
1565
1566    CONFLICT_ACTIONS: t.ClassVar[OPTIONS_TYPE] = {
1567        **dict.fromkeys(("ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK", "UPDATE"), tuple()),
1568        "DO": ("NOTHING", "UPDATE"),
1569    }
1570
1571    TRIGGER_TIMING: t.ClassVar[OPTIONS_TYPE] = {
1572        "INSTEAD": (("OF",),),
1573        "BEFORE": tuple(),
1574        "AFTER": tuple(),
1575    }
1576
1577    TRIGGER_DEFERRABLE: t.ClassVar[OPTIONS_TYPE] = {
1578        "NOT": (("DEFERRABLE",),),
1579        "DEFERRABLE": tuple(),
1580    }
1581
1582    CREATE_SEQUENCE: t.ClassVar[OPTIONS_TYPE] = {
1583        "SCALE": ("EXTEND", "NOEXTEND"),
1584        "SHARD": ("EXTEND", "NOEXTEND"),
1585        "NO": ("CYCLE", "CACHE", "MAXVALUE", "MINVALUE"),
1586        **dict.fromkeys(
1587            (
1588                "SESSION",
1589                "GLOBAL",
1590                "KEEP",
1591                "NOKEEP",
1592                "ORDER",
1593                "NOORDER",
1594                "NOCACHE",
1595                "CYCLE",
1596                "NOCYCLE",
1597                "NOMINVALUE",
1598                "NOMAXVALUE",
1599                "NOSCALE",
1600                "NOSHARD",
1601            ),
1602            tuple(),
1603        ),
1604    }
1605
1606    ISOLATED_LOADING_OPTIONS: t.ClassVar[OPTIONS_TYPE] = {"FOR": ("ALL", "INSERT", "NONE")}
1607
1608    USABLES: t.ClassVar[OPTIONS_TYPE] = dict.fromkeys(
1609        ("ROLE", "WAREHOUSE", "DATABASE", "SCHEMA", "CATALOG"), tuple()
1610    )
1611
1612    CAST_ACTIONS: t.ClassVar[OPTIONS_TYPE] = dict.fromkeys(("RENAME", "ADD"), ("FIELDS",))
1613
1614    SCHEMA_BINDING_OPTIONS: t.ClassVar[OPTIONS_TYPE] = {
1615        "TYPE": ("EVOLUTION",),
1616        **dict.fromkeys(("BINDING", "COMPENSATION", "EVOLUTION"), tuple()),
1617    }
1618
1619    PROCEDURE_OPTIONS: t.ClassVar[OPTIONS_TYPE] = {}
1620
1621    EXECUTE_AS_OPTIONS: t.ClassVar[OPTIONS_TYPE] = dict.fromkeys(
1622        ("CALLER", "SELF", "OWNER"), tuple()
1623    )
1624
1625    KEY_CONSTRAINT_OPTIONS: t.ClassVar[OPTIONS_TYPE] = {
1626        "NOT": ("ENFORCED",),
1627        "MATCH": (
1628            "FULL",
1629            "PARTIAL",
1630            "SIMPLE",
1631        ),
1632        "INITIALLY": ("DEFERRED", "IMMEDIATE"),
1633        "USING": (
1634            "BTREE",
1635            "HASH",
1636        ),
1637        **dict.fromkeys(("DEFERRABLE", "NORELY", "RELY"), tuple()),
1638    }
1639
1640    WINDOW_EXCLUDE_OPTIONS: t.ClassVar[OPTIONS_TYPE] = {
1641        "NO": ("OTHERS",),
1642        "CURRENT": ("ROW",),
1643        **dict.fromkeys(("GROUP", "TIES"), tuple()),
1644    }
1645
1646    INSERT_ALTERNATIVES: t.ClassVar = {"ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK"}
1647
1648    CLONE_KEYWORDS: t.ClassVar = {"CLONE", "COPY"}
1649    HISTORICAL_DATA_PREFIX: t.ClassVar = {"AT", "BEFORE", "END"}
1650    HISTORICAL_DATA_KIND: t.ClassVar = {"OFFSET", "STATEMENT", "STREAM", "TIMESTAMP", "VERSION"}
1651
1652    OPCLASS_FOLLOW_KEYWORDS: t.ClassVar = {"ASC", "DESC", "NULLS", "WITH"}
1653
1654    OPTYPE_FOLLOW_TOKENS: t.ClassVar = {TokenType.COMMA, TokenType.R_PAREN}
1655
1656    TABLE_INDEX_HINT_TOKENS: t.ClassVar = {TokenType.FORCE, TokenType.IGNORE, TokenType.USE}
1657
1658    VIEW_ATTRIBUTES: t.ClassVar = {"ENCRYPTION", "SCHEMABINDING", "VIEW_METADATA"}
1659
1660    WINDOW_ALIAS_TOKENS: t.ClassVar = ID_VAR_TOKENS - {TokenType.RANGE, TokenType.ROWS}
1661    WINDOW_BEFORE_PAREN_TOKENS: t.ClassVar = {TokenType.OVER}
1662    WINDOW_SIDES: t.ClassVar = {"FOLLOWING", "PRECEDING"}
1663
1664    JSON_KEY_VALUE_SEPARATOR_TOKENS: t.ClassVar = {TokenType.COLON, TokenType.COMMA, TokenType.IS}
1665
1666    FETCH_TOKENS: t.ClassVar = ID_VAR_TOKENS - {TokenType.ROW, TokenType.ROWS, TokenType.PERCENT}
1667
1668    ADD_CONSTRAINT_TOKENS: t.ClassVar = {
1669        TokenType.CONSTRAINT,
1670        TokenType.FOREIGN_KEY,
1671        TokenType.INDEX,
1672        TokenType.KEY,
1673        TokenType.PRIMARY_KEY,
1674        TokenType.UNIQUE,
1675    }
1676
1677    DISTINCT_TOKENS: t.ClassVar = {TokenType.DISTINCT}
1678
1679    UNNEST_OFFSET_ALIAS_TOKENS: t.ClassVar = TABLE_ALIAS_TOKENS - SET_OPERATIONS
1680
1681    SELECT_START_TOKENS: t.ClassVar = {TokenType.L_PAREN, TokenType.WITH, TokenType.SELECT}
1682
1683    COPY_INTO_VARLEN_OPTIONS: t.ClassVar = {
1684        "FILE_FORMAT",
1685        "COPY_OPTIONS",
1686        "FORMAT_OPTIONS",
1687        "CREDENTIAL",
1688    }
1689
1690    IS_JSON_PREDICATE_KIND: t.ClassVar = {"VALUE", "SCALAR", "ARRAY", "OBJECT"}
1691
1692    ODBC_DATETIME_LITERALS: t.ClassVar[dict[str, type[exp.Expr]]] = {}
1693
1694    ON_CONDITION_TOKENS: t.ClassVar = {"ERROR", "NULL", "TRUE", "FALSE", "EMPTY"}
1695
1696    PRIVILEGE_FOLLOW_TOKENS: t.ClassVar = {TokenType.ON, TokenType.COMMA, TokenType.L_PAREN}
1697
1698    # The style options for the DESCRIBE statement
1699    DESCRIBE_STYLES: t.ClassVar = {"ANALYZE", "EXTENDED", "FORMATTED", "HISTORY"}
1700
1701    SET_ASSIGNMENT_DELIMITERS: t.ClassVar = {"=", ":=", "TO"}
1702
1703    # The style options for the ANALYZE statement
1704    ANALYZE_STYLES: t.ClassVar = {
1705        "BUFFER_USAGE_LIMIT",
1706        "FULL",
1707        "LOCAL",
1708        "NO_WRITE_TO_BINLOG",
1709        "SAMPLE",
1710        "SKIP_LOCKED",
1711        "VERBOSE",
1712    }
1713
1714    ANALYZE_EXPRESSION_PARSERS: t.ClassVar = {
1715        "ALL": lambda self: self._parse_analyze_columns(),
1716        "COMPUTE": lambda self: self._parse_analyze_statistics(),
1717        "DELETE": lambda self: self._parse_analyze_delete(),
1718        "DROP": lambda self: self._parse_analyze_histogram(),
1719        "ESTIMATE": lambda self: self._parse_analyze_statistics(),
1720        "LIST": lambda self: self._parse_analyze_list(),
1721        "PREDICATE": lambda self: self._parse_analyze_columns(),
1722        "UPDATE": lambda self: self._parse_analyze_histogram(),
1723        "VALIDATE": lambda self: self._parse_analyze_validate(),
1724    }
1725
1726    PARTITION_KEYWORDS: t.ClassVar = {"PARTITION", "SUBPARTITION"}
1727
1728    AMBIGUOUS_ALIAS_TOKENS: t.ClassVar = (TokenType.LIMIT, TokenType.OFFSET)
1729
1730    OPERATION_MODIFIERS: t.ClassVar[set[str]] = set()
1731
1732    RECURSIVE_CTE_SEARCH_KIND: t.ClassVar = {"BREADTH", "DEPTH", "CYCLE"}
1733
1734    SECURITY_PROPERTY_KEYWORDS: t.ClassVar = {"DEFINER", "INVOKER", "NONE"}
1735
1736    MODIFIABLES: t.ClassVar = (exp.Query, exp.Table, exp.TableFromRows, exp.Values)
1737
1738    STRICT_CAST: t.ClassVar = True
1739
1740    PREFIXED_PIVOT_COLUMNS: t.ClassVar = False
1741    IDENTIFY_PIVOT_STRINGS: t.ClassVar = False
1742
1743    LOG_DEFAULTS_TO_LN: t.ClassVar = False
1744
1745    # Whether the table sample clause expects CSV syntax
1746    TABLESAMPLE_CSV: t.ClassVar = False
1747
1748    # The default method used for table sampling
1749    DEFAULT_SAMPLING_METHOD: t.ClassVar[str | None] = None
1750
1751    # Whether the SET command needs a delimiter (e.g. "=") for assignments
1752    SET_REQUIRES_ASSIGNMENT_DELIMITER: t.ClassVar = True
1753
1754    # Whether the TRIM function expects the characters to trim as its first argument
1755    TRIM_PATTERN_FIRST: t.ClassVar = False
1756
1757    # Whether string aliases are supported `SELECT COUNT(*) 'count'`
1758    STRING_ALIASES: t.ClassVar = False
1759
1760    # Whether query modifiers such as LIMIT are attached to the UNION node (vs its right operand)
1761    MODIFIERS_ATTACHED_TO_SET_OP: t.ClassVar = True
1762    SET_OP_MODIFIERS: t.ClassVar = {"order", "limit", "offset"}
1763
1764    # Whether to parse IF statements that aren't followed by a left parenthesis as commands
1765    NO_PAREN_IF_COMMANDS: t.ClassVar = True
1766
1767    # Whether the -> and ->> operators expect documents of type JSON (e.g. Postgres)
1768    JSON_ARROWS_REQUIRE_JSON_TYPE: t.ClassVar = False
1769
1770    # Whether the `:` operator is used to extract a value from a VARIANT column
1771    COLON_IS_VARIANT_EXTRACT: t.ClassVar = False
1772
1773    # Whether or not a VALUES keyword needs to be followed by '(' to form a VALUES clause.
1774    # If this is True and '(' is not found, the keyword will be treated as an identifier
1775    VALUES_FOLLOWED_BY_PAREN: t.ClassVar = True
1776
1777    # Whether implicit unnesting is supported, e.g. SELECT 1 FROM y.z AS z, z.a (Redshift)
1778    SUPPORTS_IMPLICIT_UNNEST: t.ClassVar = False
1779
1780    # Whether or not interval spans are supported, INTERVAL 1 YEAR TO MONTHS
1781    INTERVAL_SPANS: t.ClassVar = True
1782
1783    # Whether a PARTITION clause can follow a table reference
1784    SUPPORTS_PARTITION_SELECTION: t.ClassVar = False
1785
1786    # Whether the `name AS expr` schema/column constraint requires parentheses around `expr`
1787    WRAPPED_TRANSFORM_COLUMN_CONSTRAINT: t.ClassVar = True
1788
1789    # Whether the 'AS' keyword is optional in the CTE definition syntax
1790    OPTIONAL_ALIAS_TOKEN_CTE: t.ClassVar = True
1791
1792    # Whether renaming a column with an ALTER statement requires the presence of the COLUMN keyword
1793    ALTER_RENAME_REQUIRES_COLUMN: t.ClassVar = True
1794
1795    # Whether Alter statements are allowed to contain Partition specifications
1796    ALTER_TABLE_PARTITIONS: t.ClassVar = False
1797
1798    # Whether all join types have the same precedence, i.e., they "naturally" produce a left-deep tree.
1799    # In standard SQL, joins that use the JOIN keyword take higher precedence than comma-joins. That is
1800    # to say, JOIN operators happen before comma operators. This is not the case in some dialects, such
1801    # as BigQuery, where all joins have the same precedence.
1802    JOINS_HAVE_EQUAL_PRECEDENCE: t.ClassVar = False
1803
1804    # Whether TIMESTAMP <literal> can produce a zone-aware timestamp
1805    ZONE_AWARE_TIMESTAMP_CONSTRUCTOR: t.ClassVar = False
1806
1807    # Whether map literals support arbitrary expressions as keys.
1808    # When True, allows complex keys like arrays or literals: {[1, 2]: 3}, {1: 2} (e.g. DuckDB).
1809    # When False, keys are typically restricted to identifiers.
1810    MAP_KEYS_ARE_ARBITRARY_EXPRESSIONS: t.ClassVar = False
1811
1812    # Whether JSON_EXTRACT requires a JSON expression as the first argument, e.g this
1813    # is true for Snowflake but not for BigQuery which can also process strings
1814    JSON_EXTRACT_REQUIRES_JSON_EXPRESSION: t.ClassVar = False
1815
1816    # Dialects like Databricks support JOINS without join criteria
1817    # Adding an ON TRUE, makes transpilation semantically correct for other dialects
1818    ADD_JOIN_ON_TRUE: t.ClassVar = False
1819
1820    # Whether INTERVAL spans with literal format '\d+ hh:[mm:[ss[.ff]]]'
1821    # can omit the span unit `DAY TO MINUTE` or `DAY TO SECOND`
1822    SUPPORTS_OMITTED_INTERVAL_SPAN_UNIT: t.ClassVar = False
1823
1824    SHOW_TRIE: t.ClassVar[dict] = new_trie(key.split(" ") for key in SHOW_PARSERS)
1825    SET_TRIE: t.ClassVar[dict] = new_trie(key.split(" ") for key in SET_PARSERS)
1826
1827    def __init__(
1828        self,
1829        error_level: ErrorLevel | None = None,
1830        error_message_context: int = 100,
1831        max_errors: int = 3,
1832        max_nodes: int = -1,
1833        dialect: DialectType = None,
1834    ):
1835        self.error_level: ErrorLevel = error_level or ErrorLevel.IMMEDIATE
1836        self.error_message_context: int = error_message_context
1837        self.max_errors: int = max_errors
1838        self.max_nodes: int = max_nodes
1839        self.dialect: t.Any = _resolve_dialect(dialect)
1840        self.sql: str = ""
1841        self.errors: list[ParseError] = []
1842        self._tokens: list[Token] = []
1843        self._tokens_size: i64 = 0
1844        self._index: i64 = 0
1845        self._curr: Token = SENTINEL_NONE
1846        self._next: Token = SENTINEL_NONE
1847        self._prev: Token = SENTINEL_NONE
1848        self._prev_comments: list[str] = []
1849        self._pipe_cte_counter: int = 0
1850        self._chunks: list[list[Token]] = []
1851        self._chunk_index: i64 = 0
1852        self._node_count: int = 0
1853
1854    def reset(self) -> None:
1855        self.sql = ""
1856        self.errors = []
1857        self._tokens = []
1858        self._tokens_size = 0
1859        self._index = 0
1860        self._curr = SENTINEL_NONE
1861        self._next = SENTINEL_NONE
1862        self._prev = SENTINEL_NONE
1863        self._prev_comments = []
1864        self._pipe_cte_counter = 0
1865        self._chunks = []
1866        self._chunk_index = 0
1867        self._node_count = 0
1868
1869    def _advance(self, times: i64 = 1) -> None:
1870        index = self._index + times
1871        self._index = index
1872        tokens = self._tokens
1873        size = self._tokens_size
1874        self._curr = tokens[index] if index < size else SENTINEL_NONE
1875        self._next = tokens[index + 1] if index + 1 < size else SENTINEL_NONE
1876
1877        if index > 0:
1878            prev = tokens[index - 1]
1879            self._prev = prev
1880            self._prev_comments = prev.comments
1881        else:
1882            self._prev = SENTINEL_NONE
1883            self._prev_comments = []
1884
1885    def _advance_chunk(self) -> None:
1886        self._index = -1
1887        self._tokens = self._chunks[self._chunk_index]
1888        self._tokens_size = i64(len(self._tokens))
1889        self._chunk_index += 1
1890        self._advance()
1891
1892    def _retreat(self, index: i64) -> None:
1893        if index != self._index:
1894            self._advance(index - self._index)
1895
1896    def _add_comments(self, expression: exp.Expr | None) -> None:
1897        if expression and self._prev_comments:
1898            expression.add_comments(self._prev_comments)
1899            self._prev_comments = []
1900
1901    def _match(
1902        self, token_type: TokenType, advance: bool = True, expression: exp.Expr | None = None
1903    ) -> bool:
1904        if self._curr.token_type == token_type:
1905            if advance:
1906                self._advance()
1907            self._add_comments(expression)
1908            return True
1909        return False
1910
1911    def _match_set(self, types: t.Collection[TokenType], advance: bool = True) -> bool:
1912        if self._curr.token_type in types:
1913            if advance:
1914                self._advance()
1915            return True
1916        return False
1917
1918    def _match_pair(
1919        self, token_type_a: TokenType, token_type_b: TokenType, advance: bool = True
1920    ) -> bool:
1921        if self._curr.token_type == token_type_a and self._next.token_type == token_type_b:
1922            if advance:
1923                self._advance(2)
1924            return True
1925        return False
1926
1927    def _match_texts(self, texts: t.Collection[str], advance: bool = True) -> bool:
1928        if self._curr.token_type != TokenType.STRING and self._curr.text.upper() in texts:
1929            if advance:
1930                self._advance()
1931            return True
1932        return False
1933
1934    def _match_text_seq(self, *texts: str, advance: bool = True) -> bool:
1935        index = self._index
1936        string_type = TokenType.STRING
1937        for text in texts:
1938            if self._curr.token_type != string_type and self._curr.text.upper() == text:
1939                self._advance()
1940            else:
1941                self._retreat(index)
1942                return False
1943
1944        if not advance:
1945            self._retreat(index)
1946
1947        return True
1948
1949    def _is_connected(self) -> bool:
1950        prev = self._prev
1951        curr = self._curr
1952        return bool(prev and curr and prev.end + 1 == curr.start)
1953
1954    def _find_sql(self, start: Token, end: Token) -> str:
1955        return self.sql[start.start : end.end + 1]
1956
1957    def raise_error(self, message: str, token: Token = SENTINEL_NONE) -> None:
1958        token = token or self._curr or self._prev or Token.string("")
1959        formatted_sql, start_context, highlight, end_context = highlight_sql(
1960            sql=self.sql,
1961            positions=[(token.start, token.end)],
1962            context_length=self.error_message_context,
1963        )
1964        formatted_message = f"{message}. Line {token.line}, Col: {token.col}.\n  {formatted_sql}"
1965
1966        error = ParseError.new(
1967            formatted_message,
1968            description=message,
1969            line=token.line,
1970            col=token.col,
1971            start_context=start_context,
1972            highlight=highlight,
1973            end_context=end_context,
1974        )
1975
1976        if self.error_level == ErrorLevel.IMMEDIATE:
1977            raise error
1978
1979        self.errors.append(error)
1980
1981    def validate_expression(self, expression: E, args: list | None = None) -> E:
1982        if self.max_nodes > -1:
1983            self._node_count += 1
1984            if self._node_count > self.max_nodes:
1985                self.raise_error(f"Maximum number of AST nodes ({self.max_nodes}) exceeded")
1986        if self.error_level != ErrorLevel.IGNORE:
1987            for error_message in expression.error_messages(args):
1988                self.raise_error(error_message)
1989        return expression
1990
1991    def _try_parse(self, parse_method: t.Callable[[], T], retreat: bool = False) -> T | None:
1992        index = self._index
1993        error_level = self.error_level
1994        this: T | None = None
1995
1996        self.error_level = ErrorLevel.IMMEDIATE
1997        try:
1998            this = parse_method()
1999        except ParseError:
2000            this = None
2001        finally:
2002            if not this or retreat:
2003                self._retreat(index)
2004            self.error_level = error_level
2005
2006        return this
2007
2008    def parse(self, raw_tokens: list[Token], sql: str) -> list[exp.Expr | None]:
2009        """
2010        Parses a list of tokens and returns a list of syntax trees, one tree
2011        per parsed SQL statement.
2012
2013        Args:
2014            raw_tokens: The list of tokens.
2015            sql: The original SQL string.
2016
2017        Returns:
2018            The list of the produced syntax trees.
2019        """
2020        return self._parse(
2021            parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql
2022        )
2023
2024    def parse_into(
2025        self,
2026        expression_types: exp.IntoType,
2027        raw_tokens: list[Token],
2028        sql: str | None = None,
2029    ) -> list[exp.Expr | None]:
2030        """
2031        Parses a list of tokens into a given Expr type. If a collection of Expr
2032        types is given instead, this method will try to parse the token list into each one
2033        of them, stopping at the first for which the parsing succeeds.
2034
2035        Args:
2036            expression_types: The expression type(s) to try and parse the token list into.
2037            raw_tokens: The list of tokens.
2038            sql: The original SQL string, used to produce helpful debug messages.
2039
2040        Returns:
2041            The target Expr.
2042        """
2043        errors = []
2044        for expression_type in ensure_list(expression_types):
2045            parser = self.EXPRESSION_PARSERS.get(t.cast(type[exp.Expr], expression_type))
2046            if not parser:
2047                raise TypeError(f"No parser registered for {expression_type}")
2048
2049            try:
2050                return self._parse(parser, raw_tokens, sql)
2051            except ParseError as e:
2052                e.errors[0]["into_expression"] = expression_type
2053                errors.append(e)
2054
2055        raise ParseError(
2056            f"Failed to parse '{sql or raw_tokens}' into {expression_types}",
2057            errors=merge_errors(errors),
2058        ) from errors[-1]
2059
2060    def check_errors(self) -> None:
2061        """Logs or raises any found errors, depending on the chosen error level setting."""
2062        if self.error_level == ErrorLevel.WARN:
2063            for error in self.errors:
2064                logger.error(str(error))
2065        elif self.error_level == ErrorLevel.RAISE and self.errors:
2066            raise ParseError(
2067                concat_messages(self.errors, self.max_errors),
2068                errors=merge_errors(self.errors),
2069            )
2070
2071    def expression(
2072        self,
2073        instance: E,
2074        token: Token | None = None,
2075        comments: list[str] | None = None,
2076    ) -> E:
2077        if token:
2078            instance.update_positions(token)
2079        instance.add_comments(comments) if comments else self._add_comments(instance)
2080        if not instance.is_primitive:
2081            instance = self.validate_expression(instance)
2082        return instance
2083
2084    def _parse_batch_statements(
2085        self,
2086        parse_method: t.Callable[[Parser], exp.Expr | None],
2087        sep_first_statement: bool = True,
2088    ) -> list[exp.Expr | None]:
2089        expressions = []
2090
2091        # Chunkification binds if/while statements with the first statement of the body
2092        if sep_first_statement:
2093            self._match(TokenType.BEGIN)
2094            expressions.append(parse_method(self))
2095
2096        chunks_length = len(self._chunks)
2097        while self._chunk_index < chunks_length:
2098            self._advance_chunk()
2099
2100            if self._match(TokenType.ELSE, advance=False):
2101                return expressions
2102
2103            if expressions and not self._next and self._match(TokenType.END):
2104                expressions.append(exp.EndStatement())
2105                continue
2106
2107            expressions.append(parse_method(self))
2108
2109            if self._index < self._tokens_size:
2110                self.raise_error("Invalid expression / Unexpected token")
2111
2112            self.check_errors()
2113
2114        return expressions
2115
2116    def _parse(
2117        self,
2118        parse_method: t.Callable[[Parser], exp.Expr | None],
2119        raw_tokens: list[Token],
2120        sql: str | None = None,
2121    ) -> list[exp.Expr | None]:
2122        self.reset()
2123        self.sql = sql or ""
2124
2125        total = len(raw_tokens)
2126        chunks: list[list[Token]] = [[]]
2127
2128        for i, token in enumerate(raw_tokens):
2129            if token.token_type == TokenType.SEMICOLON:
2130                if token.comments:
2131                    chunks.append([token])
2132
2133                if i < total - 1:
2134                    chunks.append([])
2135            else:
2136                chunks[-1].append(token)
2137
2138        self._chunks = chunks
2139
2140        return self._parse_batch_statements(parse_method=parse_method, sep_first_statement=False)
2141
2142    def _warn_unsupported(self) -> None:
2143        if self._tokens_size <= 1:
2144            return
2145
2146        # We use _find_sql because self.sql may comprise multiple chunks, and we're only
2147        # interested in emitting a warning for the one being currently processed.
2148        sql = self._find_sql(self._tokens[0], self._tokens[-1])[: self.error_message_context]
2149
2150        logger.warning(
2151            f"'{sql}' contains unsupported syntax. Falling back to parsing as a 'Command'."
2152        )
2153
2154    def _parse_command(self) -> exp.Command:
2155        self._warn_unsupported()
2156        comments = self._prev_comments
2157        return self.expression(
2158            exp.Command(this=self._prev.text.upper(), expression=self._parse_string()),
2159            comments=comments,
2160        )
2161
2162    def _parse_comment(self, allow_exists: bool = True) -> exp.Expr:
2163        start = self._prev
2164        exists = self._parse_exists() if allow_exists else None
2165
2166        self._match(TokenType.ON)
2167
2168        materialized = self._match_text_seq("MATERIALIZED")
2169        kind = self._match_set(self.CREATABLES) and self._prev
2170        if not kind:
2171            return self._parse_as_command(start)
2172
2173        if kind.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE):
2174            this = self._parse_user_defined_function(kind=kind.token_type)
2175        elif kind.token_type == TokenType.TABLE:
2176            this = self._parse_table(alias_tokens=self.COMMENT_TABLE_ALIAS_TOKENS)
2177        elif kind.token_type == TokenType.COLUMN:
2178            this = self._parse_column()
2179        else:
2180            this = self._parse_id_var()
2181
2182        self._match(TokenType.IS)
2183
2184        return self.expression(
2185            exp.Comment(
2186                this=this,
2187                kind=kind.text,
2188                expression=self._parse_string(),
2189                exists=exists,
2190                materialized=materialized,
2191            )
2192        )
2193
2194    def _parse_to_table(
2195        self,
2196    ) -> exp.ToTableProperty:
2197        table = self._parse_table_parts(schema=True)
2198        return self.expression(exp.ToTableProperty(this=table))
2199
2200    # https://clickhouse.com/docs/en/engines/table-engines/mergetree-family/mergetree#mergetree-table-ttl
2201    def _parse_ttl(self) -> exp.Expr:
2202        def _parse_ttl_action() -> exp.Expr | None:
2203            this = self._parse_bitwise()
2204
2205            if self._match_text_seq("DELETE"):
2206                return self.expression(exp.MergeTreeTTLAction(this=this, delete=True))
2207            if self._match_text_seq("RECOMPRESS"):
2208                return self.expression(
2209                    exp.MergeTreeTTLAction(this=this, recompress=self._parse_bitwise())
2210                )
2211            if self._match_text_seq("TO", "DISK"):
2212                return self.expression(
2213                    exp.MergeTreeTTLAction(this=this, to_disk=self._parse_string())
2214                )
2215            if self._match_text_seq("TO", "VOLUME"):
2216                return self.expression(
2217                    exp.MergeTreeTTLAction(this=this, to_volume=self._parse_string())
2218                )
2219
2220            return this
2221
2222        expressions = self._parse_csv(_parse_ttl_action)
2223        where = self._parse_where()
2224        group = self._parse_group()
2225
2226        aggregates = None
2227        if group and self._match(TokenType.SET):
2228            aggregates = self._parse_csv(self._parse_set_item)
2229
2230        return self.expression(
2231            exp.MergeTreeTTL(
2232                expressions=expressions, where=where, group=group, aggregates=aggregates
2233            )
2234        )
2235
2236    def _parse_condition(self) -> exp.Expr | None:
2237        return self._parse_wrapped(parse_method=self._parse_expression, optional=True)
2238
2239    def _parse_block(self) -> exp.Block:
2240        return self.expression(
2241            exp.Block(
2242                expressions=self._parse_batch_statements(
2243                    parse_method=lambda self: self._parse_statement()
2244                )
2245            )
2246        )
2247
2248    def _parse_whileblock(self) -> exp.WhileBlock:
2249        return self.expression(
2250            exp.WhileBlock(this=self._parse_condition(), body=self._parse_block())
2251        )
2252
2253    def _parse_statement(self) -> exp.Expr | None:
2254        if not self._curr:
2255            return None
2256
2257        if self._match_set(self.STATEMENT_PARSERS):
2258            comments = self._prev_comments
2259            stmt = self.STATEMENT_PARSERS[self._prev.token_type](self)
2260            stmt.add_comments(comments, prepend=True)
2261            return stmt
2262
2263        if self._match_set(self.dialect.tokenizer_class.COMMANDS):
2264            return self._parse_command()
2265
2266        if self._match_text_seq("WHILE"):
2267            return self._parse_whileblock()
2268
2269        expression = self._parse_expression()
2270        expression = self._parse_set_operations(expression) if expression else self._parse_select()
2271
2272        if isinstance(expression, exp.Subquery) and self._match(TokenType.PIPE_GT, advance=False):
2273            expression = self._parse_pipe_syntax_query(expression)
2274
2275        return self._parse_query_modifiers(expression)
2276
2277    def _parse_drop(self, exists: bool = False) -> exp.Drop | exp.Command:
2278        start = self._prev
2279        temporary = self._match(TokenType.TEMPORARY)
2280        materialized = self._match_text_seq("MATERIALIZED")
2281        iceberg = self._match_text_seq("ICEBERG")
2282
2283        kind = self._match_set(self.CREATABLES) and self._prev.text.upper()
2284        if not kind or (iceberg and kind and kind != "TABLE"):
2285            return self._parse_as_command(start)
2286
2287        concurrently = self._match_text_seq("CONCURRENTLY")
2288        if_exists = exists or self._parse_exists()
2289
2290        if kind == "COLUMN":
2291            this = self._parse_column()
2292        else:
2293            this = self._parse_table_parts(schema=True, is_db_reference=kind == "SCHEMA")
2294
2295        cluster = self._parse_on_property() if self._match(TokenType.ON) else None
2296
2297        if self._match(TokenType.L_PAREN, advance=False):
2298            expressions = self._parse_wrapped_csv(self._parse_types)
2299        else:
2300            expressions = None
2301
2302        cascade_or_restrict = self._match_texts(("CASCADE", "RESTRICT")) and self._prev.text.upper()
2303
2304        return self.expression(
2305            exp.Drop(
2306                exists=if_exists,
2307                this=this,
2308                expressions=expressions,
2309                kind=self.dialect.CREATABLE_KIND_MAPPING.get(kind) or kind,
2310                temporary=temporary,
2311                materialized=materialized,
2312                cascade=cascade_or_restrict == "CASCADE",
2313                restrict=cascade_or_restrict == "RESTRICT",
2314                constraints=self._match_text_seq("CONSTRAINTS"),
2315                purge=self._match_text_seq("PURGE"),
2316                cluster=cluster,
2317                concurrently=concurrently,
2318                sync=self._match_text_seq("SYNC"),
2319                iceberg=iceberg,
2320            )
2321        )
2322
2323    def _parse_exists(self, not_: bool = False) -> bool | None:
2324        return (
2325            self._match_text_seq("IF")
2326            and (not not_ or self._match(TokenType.NOT))
2327            and self._match(TokenType.EXISTS)
2328        )
2329
2330    def _parse_create(self) -> exp.Create | exp.Command:
2331        # Note: this can't be None because we've matched a statement parser
2332        start = self._prev
2333
2334        replace = (
2335            start.token_type == TokenType.REPLACE
2336            or self._match_pair(TokenType.OR, TokenType.REPLACE)
2337            or self._match_pair(TokenType.OR, TokenType.ALTER)
2338        )
2339        refresh = self._match_pair(TokenType.OR, TokenType.REFRESH)
2340
2341        unique = self._match(TokenType.UNIQUE)
2342
2343        if self._match_text_seq("CLUSTERED", "COLUMNSTORE"):
2344            clustered = True
2345        elif self._match_text_seq("NONCLUSTERED", "COLUMNSTORE") or self._match_text_seq(
2346            "COLUMNSTORE"
2347        ):
2348            clustered = False
2349        else:
2350            clustered = None
2351
2352        if self._match_pair(TokenType.TABLE, TokenType.FUNCTION, advance=False):
2353            self._advance()
2354
2355        properties = None
2356        create_token = self._match_set(self.CREATABLES) and self._prev
2357
2358        if not create_token:
2359            # exp.Properties.Location.POST_CREATE
2360            properties = self._parse_properties()
2361            create_token = self._match_set(self.CREATABLES) and self._prev
2362
2363            if not properties or not create_token:
2364                return self._parse_as_command(start)
2365
2366        create_token_type = t.cast(Token, create_token).token_type
2367
2368        concurrently = self._match_text_seq("CONCURRENTLY")
2369        exists = self._parse_exists(not_=True)
2370        this = None
2371        expression: exp.Expr | None = None
2372        indexes = None
2373        no_schema_binding = None
2374        begin = None
2375        clone = None
2376
2377        def extend_props(temp_props: exp.Properties | None) -> None:
2378            nonlocal properties
2379            if properties and temp_props:
2380                properties.expressions.extend(temp_props.expressions)
2381            elif temp_props:
2382                properties = temp_props
2383
2384        if create_token_type in (TokenType.FUNCTION, TokenType.PROCEDURE):
2385            this = self._parse_user_defined_function(kind=create_token_type)
2386
2387            # exp.Properties.Location.POST_SCHEMA ("schema" here is the UDF's type signature)
2388            extend_props(self._parse_properties())
2389
2390            expression = self._parse_heredoc() if self._match(TokenType.ALIAS) else None
2391            extend_props(self._parse_function_properties())
2392
2393            if not expression:
2394                if self._match(TokenType.COMMAND):
2395                    expression = self._parse_as_command(self._prev)
2396                else:
2397                    begin = self._match(TokenType.BEGIN)
2398                    return_ = self._match_text_seq("RETURN")
2399
2400                    if self._match(TokenType.STRING, advance=False):
2401                        # Takes care of BigQuery's JavaScript UDF definitions that end in an OPTIONS property
2402                        # # https://cloud.google.com/bigquery/docs/reference/standard-sql/data-definition-language#create_function_statement
2403                        expression = self._parse_string()
2404                        extend_props(self._parse_properties())
2405                    else:
2406                        expression = (
2407                            self._parse_user_defined_function_expression()
2408                            if create_token_type == TokenType.FUNCTION
2409                            else self._parse_block()
2410                        )
2411
2412                    if return_:
2413                        expression = self.expression(exp.Return(this=expression))
2414        elif create_token_type == TokenType.INDEX:
2415            # Postgres allows anonymous indexes, eg. CREATE INDEX IF NOT EXISTS ON t(c)
2416            if not self._match(TokenType.ON):
2417                index = self._parse_id_var()
2418                anonymous = False
2419            else:
2420                index = None
2421                anonymous = True
2422
2423            this = self._parse_index(index=index, anonymous=anonymous)
2424        elif (
2425            create_token_type == TokenType.CONSTRAINT and self._match(TokenType.TRIGGER)
2426        ) or create_token_type == TokenType.TRIGGER:
2427            if is_constraint := (create_token_type == TokenType.CONSTRAINT):
2428                create_token = self._prev
2429
2430            trigger_name = self._parse_id_var()
2431            if not trigger_name:
2432                return self._parse_as_command(start)
2433
2434            timing_var = self._parse_var_from_options(self.TRIGGER_TIMING, raise_unmatched=False)
2435            timing = timing_var.this if timing_var else None
2436            if not timing:
2437                return self._parse_as_command(start)
2438
2439            events = self._parse_trigger_events()
2440            if not self._match(TokenType.ON):
2441                self.raise_error("Expected ON in trigger definition")
2442
2443            table = self._parse_table_parts()
2444            referenced_table = self._parse_table_parts() if self._match(TokenType.FROM) else None
2445            deferrable, initially = self._parse_trigger_deferrable()
2446            referencing = self._parse_trigger_referencing()
2447            for_each = self._parse_trigger_for_each()
2448            when = self._match_text_seq("WHEN") and self._parse_wrapped(
2449                self._parse_disjunction, optional=True
2450            )
2451            execute = self._parse_trigger_execute()
2452
2453            if execute is None:
2454                return self._parse_as_command(start)
2455
2456            trigger_props = self.expression(
2457                exp.TriggerProperties(
2458                    table=table,
2459                    timing=timing,
2460                    events=events,
2461                    execute=execute,
2462                    constraint=is_constraint,
2463                    referenced_table=referenced_table,
2464                    deferrable=deferrable,
2465                    initially=initially,
2466                    referencing=referencing,
2467                    for_each=for_each,
2468                    when=when,
2469                )
2470            )
2471
2472            this = trigger_name
2473            extend_props(exp.Properties(expressions=[trigger_props] if trigger_props else []))
2474        elif create_token_type in self.DB_CREATABLES:
2475            table_parts = self._parse_table_parts(
2476                schema=True, is_db_reference=create_token_type == TokenType.SCHEMA
2477            )
2478
2479            # exp.Properties.Location.POST_NAME
2480            self._match(TokenType.COMMA)
2481            extend_props(self._parse_properties(before=True))
2482
2483            this = self._parse_schema(this=table_parts)
2484
2485            # exp.Properties.Location.POST_SCHEMA and POST_WITH
2486            extend_props(self._parse_properties())
2487
2488            has_alias = self._match(TokenType.ALIAS)
2489            if not self._match_set(self.DDL_SELECT_TOKENS, advance=False):
2490                # exp.Properties.Location.POST_ALIAS
2491                extend_props(self._parse_properties())
2492
2493            if create_token_type == TokenType.SEQUENCE:
2494                expression = self._parse_types()
2495                props = self._parse_properties()
2496                if props:
2497                    sequence_props = exp.SequenceProperties()
2498                    options = []
2499                    for prop in props:
2500                        if isinstance(prop, exp.SequenceProperties):
2501                            for arg, value in prop.args.items():
2502                                if arg == "options":
2503                                    options.extend(value)
2504                                else:
2505                                    sequence_props.set(arg, value)
2506                            prop.pop()
2507
2508                    if options:
2509                        sequence_props.set("options", options)
2510
2511                    props.append("expressions", sequence_props)
2512                    extend_props(props)
2513            else:
2514                expression = self._parse_ddl_select()
2515
2516                # Some dialects also support using a table as an alias instead of a SELECT.
2517                # Here we fallback to this as an alternative.
2518                if not expression and has_alias:
2519                    expression = self._try_parse(self._parse_table_parts)
2520
2521            if create_token_type == TokenType.TABLE:
2522                # exp.Properties.Location.POST_EXPRESSION
2523                extend_props(self._parse_properties())
2524
2525                indexes = []
2526                while True:
2527                    index = self._parse_index()
2528
2529                    # exp.Properties.Location.POST_INDEX
2530                    extend_props(self._parse_properties())
2531                    if not index:
2532                        break
2533                    else:
2534                        self._match(TokenType.COMMA)
2535                        indexes.append(index)
2536            elif create_token_type == TokenType.VIEW:
2537                if self._match_text_seq("WITH", "NO", "SCHEMA", "BINDING"):
2538                    no_schema_binding = True
2539            elif create_token_type in (TokenType.SINK, TokenType.SOURCE):
2540                extend_props(self._parse_properties())
2541
2542            shallow = self._match_text_seq("SHALLOW")
2543
2544            if self._match_texts(self.CLONE_KEYWORDS):
2545                copy = self._prev.text.lower() == "copy"
2546                clone = self.expression(
2547                    exp.Clone(this=self._parse_table(schema=True), shallow=shallow, copy=copy)
2548                )
2549
2550        if self._curr and not self._match_set((TokenType.R_PAREN, TokenType.COMMA), advance=False):
2551            return self._parse_as_command(start)
2552
2553        create_kind_text = create_token.text.upper()
2554        return self.expression(
2555            exp.Create(
2556                this=this,
2557                kind=self.dialect.CREATABLE_KIND_MAPPING.get(create_kind_text) or create_kind_text,
2558                replace=replace,
2559                refresh=refresh,
2560                unique=unique,
2561                expression=expression,
2562                exists=exists,
2563                properties=properties,
2564                indexes=indexes,
2565                no_schema_binding=no_schema_binding,
2566                begin=begin,
2567                clone=clone,
2568                concurrently=concurrently,
2569                clustered=clustered,
2570            )
2571        )
2572
2573    def _parse_sequence_properties(self) -> exp.SequenceProperties | None:
2574        seq = exp.SequenceProperties()
2575
2576        options = []
2577        index = self._index
2578
2579        while self._curr:
2580            self._match(TokenType.COMMA)
2581            if self._match_text_seq("INCREMENT"):
2582                self._match_text_seq("BY")
2583                self._match_text_seq("=")
2584                seq.set("increment", self._parse_term())
2585            elif self._match_text_seq("MINVALUE"):
2586                seq.set("minvalue", self._parse_term())
2587            elif self._match_text_seq("MAXVALUE"):
2588                seq.set("maxvalue", self._parse_term())
2589            elif self._match(TokenType.START_WITH) or self._match_text_seq("START"):
2590                self._match_text_seq("=")
2591                seq.set("start", self._parse_term())
2592            elif self._match_text_seq("CACHE"):
2593                # T-SQL allows empty CACHE which is initialized dynamically
2594                seq.set("cache", self._parse_number() or True)
2595            elif self._match_text_seq("OWNED", "BY"):
2596                # "OWNED BY NONE" is the default
2597                seq.set("owned", None if self._match_text_seq("NONE") else self._parse_column())
2598            else:
2599                opt = self._parse_var_from_options(self.CREATE_SEQUENCE, raise_unmatched=False)
2600                if opt:
2601                    options.append(opt)
2602                else:
2603                    break
2604
2605        seq.set("options", options if options else None)
2606        return None if self._index == index else seq
2607
2608    def _parse_trigger_events(self) -> list[exp.TriggerEvent]:
2609        events = []
2610
2611        while True:
2612            event_type = self._match_set(self.TRIGGER_EVENTS) and self._prev.text.upper()
2613
2614            if not event_type:
2615                self.raise_error("Expected trigger event (INSERT, UPDATE, DELETE, TRUNCATE)")
2616
2617            columns = (
2618                self._parse_csv(self._parse_column)
2619                if event_type == "UPDATE" and self._match_text_seq("OF")
2620                else None
2621            )
2622
2623            events.append(self.expression(exp.TriggerEvent(this=event_type, columns=columns)))
2624
2625            if not self._match(TokenType.OR):
2626                break
2627
2628        return events
2629
2630    def _parse_trigger_deferrable(
2631        self,
2632    ) -> tuple[str | None, str | None]:
2633        deferrable_var = self._parse_var_from_options(
2634            self.TRIGGER_DEFERRABLE, raise_unmatched=False
2635        )
2636        deferrable = deferrable_var.this if deferrable_var else None
2637
2638        initially = None
2639        if deferrable and self._match_text_seq("INITIALLY"):
2640            initially = (
2641                self._prev.text.upper() if self._match_texts(("IMMEDIATE", "DEFERRED")) else None
2642            )
2643
2644        return deferrable, initially
2645
2646    def _parse_trigger_referencing_clause(self, keyword: str) -> exp.Expr | None:
2647        if not self._match_text_seq(keyword):
2648            return None
2649        if not self._match_text_seq("TABLE"):
2650            self.raise_error(f"Expected TABLE after {keyword} in REFERENCING clause")
2651        self._match_text_seq("AS")
2652        return self._parse_id_var()
2653
2654    def _parse_trigger_referencing(self) -> exp.TriggerReferencing | None:
2655        if not self._match_text_seq("REFERENCING"):
2656            return None
2657
2658        old_alias = None
2659        new_alias = None
2660
2661        while True:
2662            if alias := self._parse_trigger_referencing_clause("OLD"):
2663                if old_alias is not None:
2664                    self.raise_error("Duplicate OLD clause in REFERENCING")
2665                old_alias = alias
2666            elif alias := self._parse_trigger_referencing_clause("NEW"):
2667                if new_alias is not None:
2668                    self.raise_error("Duplicate NEW clause in REFERENCING")
2669                new_alias = alias
2670            else:
2671                break
2672
2673        if old_alias is None and new_alias is None:
2674            self.raise_error("REFERENCING clause requires at least OLD TABLE or NEW TABLE")
2675
2676        return self.expression(exp.TriggerReferencing(old=old_alias, new=new_alias))
2677
2678    def _parse_trigger_for_each(self) -> str | None:
2679        if not self._match_text_seq("FOR", "EACH"):
2680            return None
2681
2682        return self._prev.text.upper() if self._match_texts(("ROW", "STATEMENT")) else None
2683
2684    def _parse_trigger_execute(self) -> exp.TriggerExecute | None:
2685        if not self._match(TokenType.EXECUTE):
2686            return None
2687
2688        if not self._match_set((TokenType.FUNCTION, TokenType.PROCEDURE)):
2689            self.raise_error("Expected FUNCTION or PROCEDURE after EXECUTE")
2690
2691        func_call = self._parse_column()
2692        return self.expression(exp.TriggerExecute(this=func_call))
2693
2694    def _parse_property_before(self) -> exp.Expr | list[exp.Expr] | None:
2695        # only used for teradata currently
2696        self._match(TokenType.COMMA)
2697
2698        kwargs = {
2699            "no": self._match_text_seq("NO"),
2700            "dual": self._match_text_seq("DUAL"),
2701            "before": self._match_text_seq("BEFORE"),
2702            "default": self._match_text_seq("DEFAULT"),
2703            "local": (self._match_text_seq("LOCAL") and "LOCAL")
2704            or (self._match_text_seq("NOT", "LOCAL") and "NOT LOCAL"),
2705            "after": self._match_text_seq("AFTER"),
2706            "minimum": self._match_texts(("MIN", "MINIMUM")),
2707            "maximum": self._match_texts(("MAX", "MAXIMUM")),
2708        }
2709
2710        if self._match_texts(self.PROPERTY_PARSERS):
2711            parser = self.PROPERTY_PARSERS[self._prev.text.upper()]
2712            try:
2713                return parser(self, **{k: v for k, v in kwargs.items() if v})
2714            except TypeError:
2715                self.raise_error(f"Cannot parse property '{self._prev.text}'")
2716
2717        return None
2718
2719    def _parse_wrapped_properties(self) -> list[exp.Expr | list[exp.Expr]]:
2720        return self._parse_wrapped_csv(self._parse_property)
2721
2722    def _parse_property(self) -> exp.Expr | list[exp.Expr] | None:
2723        if self._match_texts(self.PROPERTY_PARSERS):
2724            return self.PROPERTY_PARSERS[self._prev.text.upper()](self)
2725
2726        if self._match(TokenType.DEFAULT) and self._match_texts(self.PROPERTY_PARSERS):
2727            return self.PROPERTY_PARSERS[self._prev.text.upper()](self, default=True)
2728
2729        if self._match_text_seq("COMPOUND", "SORTKEY"):
2730            return self._parse_sortkey(compound=True)
2731
2732        if self._match_text_seq("PARAMETER", "STYLE", "PANDAS"):
2733            return self.expression(exp.ParameterStyleProperty(this="PANDAS"))
2734
2735        index = self._index
2736
2737        seq_props = self._parse_sequence_properties()
2738        if seq_props:
2739            return seq_props
2740
2741        self._retreat(index)
2742        key = self._parse_column()
2743
2744        if not self._match(TokenType.EQ):
2745            self._retreat(index)
2746            return None
2747
2748        # Transform the key to exp.Dot if it's dotted identifiers wrapped in exp.Column or to exp.Var otherwise
2749        if isinstance(key, exp.Column):
2750            key = key.to_dot() if len(key.parts) > 1 else exp.var(key.name)
2751
2752        value = self._parse_bitwise() or self._parse_var(any_token=True)
2753
2754        # Transform the value to exp.Var if it was parsed as exp.Column(exp.Identifier())
2755        if isinstance(value, exp.Column):
2756            value = exp.var(value.name)
2757
2758        return self.expression(exp.Property(this=key, value=value))
2759
2760    def _parse_stored(self) -> exp.FileFormatProperty | exp.StorageHandlerProperty:
2761        if self._match_text_seq("BY"):
2762            return self.expression(exp.StorageHandlerProperty(this=self._parse_var_or_string()))
2763
2764        self._match(TokenType.ALIAS)
2765        input_format = self._parse_string() if self._match_text_seq("INPUTFORMAT") else None
2766        output_format = self._parse_string() if self._match_text_seq("OUTPUTFORMAT") else None
2767
2768        return self.expression(
2769            exp.FileFormatProperty(
2770                this=(
2771                    self.expression(
2772                        exp.InputOutputFormat(
2773                            input_format=input_format, output_format=output_format
2774                        )
2775                    )
2776                    if input_format or output_format
2777                    else self._parse_var_or_string() or self._parse_number() or self._parse_id_var()
2778                ),
2779                hive_format=True,
2780            )
2781        )
2782
2783    def _parse_unquoted_field(self) -> exp.Expr | None:
2784        field = self._parse_field()
2785        if isinstance(field, exp.Identifier) and not field.quoted:
2786            field = exp.var(field)
2787
2788        return field
2789
2790    def _parse_property_assignment(self, exp_class: type[E], **kwargs: t.Any) -> E:
2791        self._match(TokenType.EQ)
2792        self._match(TokenType.ALIAS)
2793
2794        return self.expression(exp_class(this=self._parse_unquoted_field(), **kwargs))
2795
2796    def _parse_properties(self, before: bool | None = None) -> exp.Properties | None:
2797        properties = []
2798        while True:
2799            if before:
2800                prop = self._parse_property_before()
2801            else:
2802                prop = self._parse_property()
2803            if not prop:
2804                break
2805            for p in ensure_list(prop):
2806                properties.append(p)
2807
2808        if properties:
2809            return self.expression(exp.Properties(expressions=properties))
2810
2811        return None
2812
2813    def _parse_fallback(self, no: bool = False) -> exp.FallbackProperty:
2814        return self.expression(
2815            exp.FallbackProperty(no=no, protection=self._match_text_seq("PROTECTION"))
2816        )
2817
2818    def _parse_sql_security(self) -> exp.SqlSecurityProperty:
2819        return self.expression(
2820            exp.SqlSecurityProperty(
2821                this=self._match_texts(self.SECURITY_PROPERTY_KEYWORDS) and self._prev.text.upper()
2822            )
2823        )
2824
2825    def _parse_settings_property(self) -> exp.SettingsProperty:
2826        return self.expression(
2827            exp.SettingsProperty(expressions=self._parse_csv(self._parse_assignment))
2828        )
2829
2830    def _parse_volatile_property(self) -> exp.VolatileProperty | exp.StabilityProperty:
2831        if self._index >= 2:
2832            pre_volatile_token = self._tokens[self._index - 2]
2833        else:
2834            pre_volatile_token = None
2835
2836        if pre_volatile_token and pre_volatile_token.token_type in self.PRE_VOLATILE_TOKENS:
2837            return exp.VolatileProperty()
2838
2839        return self.expression(exp.StabilityProperty(this=exp.Literal.string("VOLATILE")))
2840
2841    def _parse_retention_period(self) -> exp.Var:
2842        # Parse TSQL's HISTORY_RETENTION_PERIOD: {INFINITE | <number> DAY | DAYS | MONTH ...}
2843        number = self._parse_number()
2844        number_str = f"{number} " if number else ""
2845        unit = self._parse_var(any_token=True)
2846        return exp.var(f"{number_str}{unit}")
2847
2848    def _parse_system_versioning_property(
2849        self, with_: bool = False
2850    ) -> exp.WithSystemVersioningProperty:
2851        self._match(TokenType.EQ)
2852        prop = self.expression(exp.WithSystemVersioningProperty(on=True, with_=with_))
2853
2854        if self._match_text_seq("OFF"):
2855            prop.set("on", False)
2856            return prop
2857
2858        self._match(TokenType.ON)
2859        if self._match(TokenType.L_PAREN):
2860            while self._curr and not self._match(TokenType.R_PAREN):
2861                if self._match_text_seq("HISTORY_TABLE", "="):
2862                    prop.set("this", self._parse_table_parts())
2863                elif self._match_text_seq("DATA_CONSISTENCY_CHECK", "="):
2864                    prop.set("data_consistency", self._advance_any() and self._prev.text.upper())
2865                elif self._match_text_seq("HISTORY_RETENTION_PERIOD", "="):
2866                    prop.set("retention_period", self._parse_retention_period())
2867
2868                self._match(TokenType.COMMA)
2869
2870        return prop
2871
2872    def _parse_data_deletion_property(self) -> exp.DataDeletionProperty:
2873        self._match(TokenType.EQ)
2874        on = self._match_text_seq("ON") or not self._match_text_seq("OFF")
2875        prop = self.expression(exp.DataDeletionProperty(on=on))
2876
2877        if self._match(TokenType.L_PAREN):
2878            while self._curr and not self._match(TokenType.R_PAREN):
2879                if self._match_text_seq("FILTER_COLUMN", "="):
2880                    prop.set("filter_column", self._parse_column())
2881                elif self._match_text_seq("RETENTION_PERIOD", "="):
2882                    prop.set("retention_period", self._parse_retention_period())
2883
2884                self._match(TokenType.COMMA)
2885
2886        return prop
2887
2888    def _parse_distributed_property(self) -> exp.DistributedByProperty:
2889        kind = "HASH"
2890        expressions: list[exp.Expr] | None = None
2891        if self._match_text_seq("BY", "HASH"):
2892            expressions = self._parse_wrapped_csv(self._parse_id_var)
2893        elif self._match_text_seq("BY", "RANDOM"):
2894            kind = "RANDOM"
2895
2896        # If the BUCKETS keyword is not present, the number of buckets is AUTO
2897        buckets: exp.Expr | None = None
2898        if self._match_text_seq("BUCKETS") and not self._match_text_seq("AUTO"):
2899            buckets = self._parse_number()
2900
2901        return self.expression(
2902            exp.DistributedByProperty(
2903                expressions=expressions, kind=kind, buckets=buckets, order=self._parse_order()
2904            )
2905        )
2906
2907    def _parse_composite_key_property(self, expr_type: type[E]) -> E:
2908        self._match_text_seq("KEY")
2909        expressions = self._parse_wrapped_id_vars()
2910        return self.expression(expr_type(expressions=expressions))
2911
2912    def _parse_with_property(self) -> exp.Expr | None | list[exp.Expr]:
2913        if self._match_text_seq("(", "SYSTEM_VERSIONING"):
2914            prop = self._parse_system_versioning_property(with_=True)
2915            self._match_r_paren()
2916            return prop
2917
2918        if self._match(TokenType.L_PAREN, advance=False):
2919            result: list[exp.Expr] = []
2920            for i in self._parse_wrapped_properties():
2921                result.extend(i) if isinstance(i, list) else result.append(i)
2922            return result
2923
2924        if self._match_text_seq("JOURNAL"):
2925            return self._parse_withjournaltable()
2926
2927        if self._match_texts(self.VIEW_ATTRIBUTES):
2928            return self.expression(exp.ViewAttributeProperty(this=self._prev.text.upper()))
2929
2930        if self._match_text_seq("DATA"):
2931            return self._parse_withdata(no=False)
2932        elif self._match_text_seq("NO", "DATA"):
2933            return self._parse_withdata(no=True)
2934
2935        if self._match(TokenType.SERDE_PROPERTIES, advance=False):
2936            return self._parse_serde_properties(with_=True)
2937
2938        if self._match(TokenType.SCHEMA):
2939            return self.expression(
2940                exp.WithSchemaBindingProperty(
2941                    this=self._parse_var_from_options(self.SCHEMA_BINDING_OPTIONS)
2942                )
2943            )
2944
2945        if self._match_texts(self.PROCEDURE_OPTIONS, advance=False):
2946            return self.expression(
2947                exp.WithProcedureOptions(expressions=self._parse_csv(self._parse_procedure_option))
2948            )
2949
2950        if not self._next:
2951            return None
2952
2953        return self._parse_withisolatedloading()
2954
2955    def _parse_procedure_option(self) -> exp.Expr | None:
2956        if self._match_text_seq("EXECUTE", "AS"):
2957            return self.expression(
2958                exp.ExecuteAsProperty(
2959                    this=self._parse_var_from_options(
2960                        self.EXECUTE_AS_OPTIONS, raise_unmatched=False
2961                    )
2962                    or self._parse_string()
2963                )
2964            )
2965
2966        return self._parse_var_from_options(self.PROCEDURE_OPTIONS)
2967
2968    # https://dev.mysql.com/doc/refman/8.0/en/create-view.html
2969    def _parse_definer(self) -> exp.DefinerProperty | None:
2970        self._match(TokenType.EQ)
2971
2972        user = self._parse_id_var()
2973        self._match(TokenType.PARAMETER)
2974        host = self._parse_id_var() or (self._match(TokenType.MOD) and self._prev.text)
2975
2976        if not user or not host:
2977            return None
2978
2979        return exp.DefinerProperty(this=f"{user}@{host}")
2980
2981    def _parse_withjournaltable(self) -> exp.WithJournalTableProperty:
2982        self._match(TokenType.TABLE)
2983        self._match(TokenType.EQ)
2984        return self.expression(exp.WithJournalTableProperty(this=self._parse_table_parts()))
2985
2986    def _parse_log(self, no: bool = False) -> exp.LogProperty:
2987        return self.expression(exp.LogProperty(no=no))
2988
2989    def _parse_journal(self, **kwargs) -> exp.JournalProperty:
2990        return self.expression(exp.JournalProperty(**kwargs))
2991
2992    def _parse_checksum(self) -> exp.ChecksumProperty:
2993        self._match(TokenType.EQ)
2994
2995        on = None
2996        if self._match(TokenType.ON):
2997            on = True
2998        elif self._match_text_seq("OFF"):
2999            on = False
3000
3001        return self.expression(exp.ChecksumProperty(on=on, default=self._match(TokenType.DEFAULT)))
3002
3003    def _parse_cluster(self, wrapped: bool = False) -> exp.Cluster:
3004        return self.expression(
3005            exp.Cluster(
3006                expressions=(
3007                    self._parse_wrapped_csv(self._parse_ordered)
3008                    if wrapped
3009                    else self._parse_csv(self._parse_ordered)
3010                )
3011            )
3012        )
3013
3014    def _parse_clustered_by(self) -> exp.ClusteredByProperty:
3015        self._match_text_seq("BY")
3016
3017        self._match_l_paren()
3018        expressions = self._parse_csv(self._parse_column)
3019        self._match_r_paren()
3020
3021        if self._match_text_seq("SORTED", "BY"):
3022            self._match_l_paren()
3023            sorted_by = self._parse_csv(self._parse_ordered)
3024            self._match_r_paren()
3025        else:
3026            sorted_by = None
3027
3028        self._match(TokenType.INTO)
3029        buckets = self._parse_number()
3030        self._match_text_seq("BUCKETS")
3031
3032        return self.expression(
3033            exp.ClusteredByProperty(expressions=expressions, sorted_by=sorted_by, buckets=buckets)
3034        )
3035
3036    def _parse_copy_property(self) -> exp.CopyGrantsProperty | None:
3037        if not self._match_text_seq("GRANTS"):
3038            self._retreat(self._index - 1)
3039            return None
3040
3041        return self.expression(exp.CopyGrantsProperty())
3042
3043    def _parse_freespace(self) -> exp.FreespaceProperty:
3044        self._match(TokenType.EQ)
3045        return self.expression(
3046            exp.FreespaceProperty(this=self._parse_number(), percent=self._match(TokenType.PERCENT))
3047        )
3048
3049    def _parse_mergeblockratio(
3050        self, no: bool = False, default: bool = False
3051    ) -> exp.MergeBlockRatioProperty:
3052        if self._match(TokenType.EQ):
3053            return self.expression(
3054                exp.MergeBlockRatioProperty(
3055                    this=self._parse_number(), percent=self._match(TokenType.PERCENT)
3056                )
3057            )
3058
3059        return self.expression(exp.MergeBlockRatioProperty(no=no, default=default))
3060
3061    def _parse_datablocksize(
3062        self,
3063        default: bool | None = None,
3064        minimum: bool | None = None,
3065        maximum: bool | None = None,
3066    ) -> exp.DataBlocksizeProperty:
3067        self._match(TokenType.EQ)
3068        size = self._parse_number()
3069
3070        units = None
3071        if self._match_texts(("BYTES", "KBYTES", "KILOBYTES")):
3072            units = self._prev.text
3073
3074        return self.expression(
3075            exp.DataBlocksizeProperty(
3076                size=size, units=units, default=default, minimum=minimum, maximum=maximum
3077            )
3078        )
3079
3080    def _parse_blockcompression(self) -> exp.BlockCompressionProperty:
3081        self._match(TokenType.EQ)
3082        always = self._match_text_seq("ALWAYS")
3083        manual = self._match_text_seq("MANUAL")
3084        never = self._match_text_seq("NEVER")
3085        default = self._match_text_seq("DEFAULT")
3086
3087        autotemp = None
3088        if self._match_text_seq("AUTOTEMP"):
3089            autotemp = self._parse_schema()
3090
3091        return self.expression(
3092            exp.BlockCompressionProperty(
3093                always=always, manual=manual, never=never, default=default, autotemp=autotemp
3094            )
3095        )
3096
3097    def _parse_withisolatedloading(self) -> exp.IsolatedLoadingProperty | None:
3098        index = self._index
3099        no = self._match_text_seq("NO")
3100        concurrent = self._match_text_seq("CONCURRENT")
3101
3102        if not self._match_text_seq("ISOLATED", "LOADING"):
3103            self._retreat(index)
3104            return None
3105
3106        target = self._parse_var_from_options(self.ISOLATED_LOADING_OPTIONS, raise_unmatched=False)
3107        return self.expression(
3108            exp.IsolatedLoadingProperty(no=no, concurrent=concurrent, target=target)
3109        )
3110
3111    def _parse_locking(self) -> exp.LockingProperty:
3112        if self._match(TokenType.TABLE):
3113            kind = "TABLE"
3114        elif self._match(TokenType.VIEW):
3115            kind = "VIEW"
3116        elif self._match(TokenType.ROW):
3117            kind = "ROW"
3118        elif self._match_text_seq("DATABASE"):
3119            kind = "DATABASE"
3120        else:
3121            kind = None
3122
3123        if kind in ("DATABASE", "TABLE", "VIEW"):
3124            this = self._parse_table_parts()
3125        else:
3126            this = None
3127
3128        if self._match(TokenType.FOR):
3129            for_or_in = "FOR"
3130        elif self._match(TokenType.IN):
3131            for_or_in = "IN"
3132        else:
3133            for_or_in = None
3134
3135        if self._match_text_seq("ACCESS"):
3136            lock_type = "ACCESS"
3137        elif self._match_texts(("EXCL", "EXCLUSIVE")):
3138            lock_type = "EXCLUSIVE"
3139        elif self._match_text_seq("SHARE"):
3140            lock_type = "SHARE"
3141        elif self._match_text_seq("READ"):
3142            lock_type = "READ"
3143        elif self._match_text_seq("WRITE"):
3144            lock_type = "WRITE"
3145        elif self._match_text_seq("CHECKSUM"):
3146            lock_type = "CHECKSUM"
3147        else:
3148            lock_type = None
3149
3150        override = self._match_text_seq("OVERRIDE")
3151
3152        return self.expression(
3153            exp.LockingProperty(
3154                this=this, kind=kind, for_or_in=for_or_in, lock_type=lock_type, override=override
3155            )
3156        )
3157
3158    def _parse_partition_by(self) -> list[exp.Expr]:
3159        if self._match(TokenType.PARTITION_BY):
3160            return self._parse_csv(self._parse_disjunction)
3161        return []
3162
3163    def _parse_partition_bound_spec(self) -> exp.PartitionBoundSpec:
3164        def _parse_partition_bound_expr() -> exp.Expr | None:
3165            if self._match_text_seq("MINVALUE"):
3166                return exp.var("MINVALUE")
3167            if self._match_text_seq("MAXVALUE"):
3168                return exp.var("MAXVALUE")
3169            return self._parse_bitwise()
3170
3171        this: exp.Expr | list[exp.Expr] | None = None
3172        expression = None
3173        from_expressions = None
3174        to_expressions = None
3175
3176        if self._match(TokenType.IN):
3177            this = self._parse_wrapped_csv(self._parse_bitwise)
3178        elif self._match(TokenType.FROM):
3179            from_expressions = self._parse_wrapped_csv(_parse_partition_bound_expr)
3180            self._match_text_seq("TO")
3181            to_expressions = self._parse_wrapped_csv(_parse_partition_bound_expr)
3182        elif self._match_text_seq("WITH", "(", "MODULUS"):
3183            this = self._parse_number()
3184            self._match_text_seq(",", "REMAINDER")
3185            expression = self._parse_number()
3186            self._match_r_paren()
3187        else:
3188            self.raise_error("Failed to parse partition bound spec.")
3189
3190        return self.expression(
3191            exp.PartitionBoundSpec(
3192                this=this,
3193                expression=expression,
3194                from_expressions=from_expressions,
3195                to_expressions=to_expressions,
3196            )
3197        )
3198
3199    # https://www.postgresql.org/docs/current/sql-createtable.html
3200    def _parse_partitioned_of(self) -> exp.PartitionedOfProperty | None:
3201        if not self._match_text_seq("OF"):
3202            self._retreat(self._index - 1)
3203            return None
3204
3205        this = self._parse_table(schema=True)
3206
3207        if self._match(TokenType.DEFAULT):
3208            expression: exp.Var | exp.PartitionBoundSpec = exp.var("DEFAULT")
3209        elif self._match_text_seq("FOR", "VALUES"):
3210            expression = self._parse_partition_bound_spec()
3211        else:
3212            self.raise_error("Expecting either DEFAULT or FOR VALUES clause.")
3213
3214        return self.expression(exp.PartitionedOfProperty(this=this, expression=expression))
3215
3216    def _parse_partitioned_by(self) -> exp.PartitionedByProperty:
3217        self._match(TokenType.EQ)
3218        return self.expression(
3219            exp.PartitionedByProperty(
3220                this=self._parse_schema() or self._parse_bracket(self._parse_field())
3221            )
3222        )
3223
3224    def _parse_withdata(self, no: bool = False) -> exp.WithDataProperty:
3225        if self._match_text_seq("AND", "STATISTICS"):
3226            statistics = True
3227        elif self._match_text_seq("AND", "NO", "STATISTICS"):
3228            statistics = False
3229        else:
3230            statistics = None
3231
3232        return self.expression(exp.WithDataProperty(no=no, statistics=statistics))
3233
3234    def _parse_contains_property(self) -> exp.SqlReadWriteProperty | None:
3235        if self._match_text_seq("SQL"):
3236            return self.expression(exp.SqlReadWriteProperty(this="CONTAINS SQL"))
3237        return None
3238
3239    def _parse_modifies_property(self) -> exp.SqlReadWriteProperty | None:
3240        if self._match_text_seq("SQL", "DATA"):
3241            return self.expression(exp.SqlReadWriteProperty(this="MODIFIES SQL DATA"))
3242        return None
3243
3244    def _parse_no_property(self) -> exp.Expr | None:
3245        if self._match_text_seq("PRIMARY", "INDEX"):
3246            return exp.NoPrimaryIndexProperty()
3247        if self._match_text_seq("SQL"):
3248            return self.expression(exp.SqlReadWriteProperty(this="NO SQL"))
3249        return None
3250
3251    def _parse_on_property(self) -> exp.Expr | None:
3252        if self._match_text_seq("COMMIT", "PRESERVE", "ROWS"):
3253            return exp.OnCommitProperty()
3254        if self._match_text_seq("COMMIT", "DELETE", "ROWS"):
3255            return exp.OnCommitProperty(delete=True)
3256        return self.expression(exp.OnProperty(this=self._parse_schema(self._parse_id_var())))
3257
3258    def _parse_reads_property(self) -> exp.SqlReadWriteProperty | None:
3259        if self._match_text_seq("SQL", "DATA"):
3260            return self.expression(exp.SqlReadWriteProperty(this="READS SQL DATA"))
3261        return None
3262
3263    def _parse_distkey(self) -> exp.DistKeyProperty:
3264        return self.expression(exp.DistKeyProperty(this=self._parse_wrapped(self._parse_id_var)))
3265
3266    def _parse_create_like(self) -> exp.LikeProperty | None:
3267        table = self._parse_table(schema=True)
3268
3269        options = []
3270        while self._match_texts(("INCLUDING", "EXCLUDING")):
3271            this = self._prev.text.upper()
3272
3273            id_var = self._parse_id_var()
3274            if not id_var:
3275                return None
3276
3277            options.append(
3278                self.expression(exp.Property(this=this, value=exp.var(id_var.this.upper())))
3279            )
3280
3281        return self.expression(exp.LikeProperty(this=table, expressions=options))
3282
3283    def _parse_sortkey(self, compound: bool = False) -> exp.SortKeyProperty:
3284        return self.expression(
3285            exp.SortKeyProperty(this=self._parse_wrapped_id_vars(), compound=compound)
3286        )
3287
3288    def _parse_character_set(self, default: bool = False) -> exp.CharacterSetProperty:
3289        self._match(TokenType.EQ)
3290        return self.expression(
3291            exp.CharacterSetProperty(this=self._parse_var_or_string(), default=default)
3292        )
3293
3294    def _parse_remote_with_connection(self) -> exp.RemoteWithConnectionModelProperty:
3295        self._match_text_seq("WITH", "CONNECTION")
3296        return self.expression(
3297            exp.RemoteWithConnectionModelProperty(this=self._parse_table_parts())
3298        )
3299
3300    def _parse_returns(self) -> exp.ReturnsProperty:
3301        value: exp.Expr | None
3302        null = None
3303        is_table = self._match(TokenType.TABLE)
3304
3305        if is_table:
3306            if self._match(TokenType.LT):
3307                value = self.expression(
3308                    exp.Schema(this="TABLE", expressions=self._parse_csv(self._parse_struct_types))
3309                )
3310                if not self._match(TokenType.GT):
3311                    self.raise_error("Expecting >")
3312            else:
3313                value = self._parse_schema(exp.var("TABLE"))
3314        elif self._match_text_seq("NULL", "ON", "NULL", "INPUT"):
3315            null = True
3316            value = None
3317        else:
3318            value = self._parse_types()
3319
3320        return self.expression(exp.ReturnsProperty(this=value, is_table=is_table, null=null))
3321
3322    def _parse_describe(self) -> exp.Describe:
3323        kind = self._prev.text if self._match_set(self.CREATABLES) else None
3324        style: str | None = (
3325            self._prev.text.upper() if self._match_texts(self.DESCRIBE_STYLES) else None
3326        )
3327        if self._match(TokenType.DOT):
3328            style = None
3329            self._retreat(self._index - 2)
3330
3331        format = self._parse_property() if self._match(TokenType.FORMAT, advance=False) else None
3332
3333        if self._match_set(self.STATEMENT_PARSERS, advance=False):
3334            this = self._parse_statement()
3335        else:
3336            this = self._parse_table(schema=True)
3337
3338        properties = self._parse_properties()
3339        expressions = properties.expressions if properties else None
3340        partition = self._parse_partition()
3341        return self.expression(
3342            exp.Describe(
3343                this=this,
3344                style=style,
3345                kind=kind,
3346                expressions=expressions,
3347                partition=partition,
3348                format=format,
3349                as_json=self._match_text_seq("AS", "JSON"),
3350            )
3351        )
3352
3353    def _parse_multitable_inserts(self, comments: list[str] | None) -> exp.MultitableInserts:
3354        kind = self._prev.text.upper()
3355        expressions = []
3356
3357        def parse_conditional_insert() -> exp.ConditionalInsert | None:
3358            if self._match(TokenType.WHEN):
3359                expression = self._parse_disjunction()
3360                self._match(TokenType.THEN)
3361            else:
3362                expression = None
3363
3364            else_ = self._match(TokenType.ELSE)
3365
3366            if not self._match(TokenType.INTO):
3367                return None
3368
3369            return self.expression(
3370                exp.ConditionalInsert(
3371                    this=self.expression(
3372                        exp.Insert(
3373                            this=self._parse_table(schema=True),
3374                            expression=self._parse_derived_table_values(),
3375                        )
3376                    ),
3377                    expression=expression,
3378                    else_=else_,
3379                )
3380            )
3381
3382        expression = parse_conditional_insert()
3383        while expression is not None:
3384            expressions.append(expression)
3385            expression = parse_conditional_insert()
3386
3387        return self.expression(
3388            exp.MultitableInserts(kind=kind, expressions=expressions, source=self._parse_table()),
3389            comments=comments,
3390        )
3391
3392    def _parse_insert(self) -> exp.Insert | exp.MultitableInserts:
3393        comments: list[str] = []
3394        hint = self._parse_hint()
3395        overwrite = self._match(TokenType.OVERWRITE)
3396        ignore = self._match(TokenType.IGNORE)
3397        local = self._match_text_seq("LOCAL")
3398        alternative = None
3399        is_function = None
3400
3401        if self._match_text_seq("DIRECTORY"):
3402            this: exp.Expr | None = self.expression(
3403                exp.Directory(
3404                    this=self._parse_var_or_string(),
3405                    local=local,
3406                    row_format=self._parse_row_format(match_row=True),
3407                )
3408            )
3409        else:
3410            if self._match_set((TokenType.FIRST, TokenType.ALL)):
3411                comments += ensure_list(self._prev_comments)
3412                return self._parse_multitable_inserts(comments)
3413
3414            if self._match(TokenType.OR):
3415                alternative = self._match_texts(self.INSERT_ALTERNATIVES) and self._prev.text
3416
3417            self._match(TokenType.INTO)
3418            comments += ensure_list(self._prev_comments)
3419            self._match(TokenType.TABLE)
3420            is_function = self._match(TokenType.FUNCTION)
3421
3422            this = self._parse_function() if is_function else self._parse_insert_table()
3423
3424        returning = self._parse_returning()  # TSQL allows RETURNING before source
3425
3426        return self.expression(
3427            exp.Insert(
3428                hint=hint,
3429                is_function=is_function,
3430                this=this,
3431                stored=self._match_text_seq("STORED") and self._parse_stored(),
3432                by_name=self._match_text_seq("BY", "NAME"),
3433                exists=self._parse_exists(),
3434                where=self._match_pair(TokenType.REPLACE, TokenType.WHERE)
3435                and self._parse_disjunction(),
3436                partition=self._match(TokenType.PARTITION_BY) and self._parse_partitioned_by(),
3437                settings=self._match_text_seq("SETTINGS") and self._parse_settings_property(),
3438                default=self._match_text_seq("DEFAULT", "VALUES"),
3439                expression=self._parse_derived_table_values() or self._parse_ddl_select(),
3440                conflict=self._parse_on_conflict(),
3441                returning=returning or self._parse_returning(),
3442                overwrite=overwrite,
3443                alternative=alternative,
3444                ignore=ignore,
3445                source=self._match(TokenType.TABLE) and self._parse_table(),
3446            ),
3447            comments=comments,
3448        )
3449
3450    def _parse_insert_table(self) -> exp.Expr | None:
3451        this = self._parse_table(schema=True, parse_partition=True)
3452        if isinstance(this, exp.Table) and self._match(TokenType.ALIAS, advance=False):
3453            this.set("alias", self._parse_table_alias())
3454        return this
3455
3456    def _parse_kill(self) -> exp.Kill:
3457        kind = exp.var(self._prev.text) if self._match_texts(("CONNECTION", "QUERY")) else None
3458
3459        return self.expression(exp.Kill(this=self._parse_primary(), kind=kind))
3460
3461    def _parse_on_conflict(self) -> exp.OnConflict | None:
3462        conflict = self._match_text_seq("ON", "CONFLICT")
3463        duplicate = self._match_text_seq("ON", "DUPLICATE", "KEY")
3464
3465        if not conflict and not duplicate:
3466            return None
3467
3468        conflict_keys = None
3469        constraint = None
3470
3471        if conflict:
3472            if self._match_text_seq("ON", "CONSTRAINT"):
3473                constraint = self._parse_id_var()
3474            elif self._match(TokenType.L_PAREN):
3475                conflict_keys = self._parse_csv(self._parse_id_var)
3476                self._match_r_paren()
3477
3478        index_predicate = self._parse_where()
3479
3480        action = self._parse_var_from_options(self.CONFLICT_ACTIONS)
3481        if self._prev.token_type == TokenType.UPDATE:
3482            self._match(TokenType.SET)
3483            expressions = self._parse_csv(self._parse_equality)
3484        else:
3485            expressions = None
3486
3487        return self.expression(
3488            exp.OnConflict(
3489                duplicate=duplicate,
3490                expressions=expressions,
3491                action=action,
3492                conflict_keys=conflict_keys,
3493                index_predicate=index_predicate,
3494                constraint=constraint,
3495                where=self._parse_where(),
3496            )
3497        )
3498
3499    def _parse_returning(self) -> exp.Returning | None:
3500        if not self._match(TokenType.RETURNING):
3501            return None
3502        return self.expression(
3503            exp.Returning(
3504                expressions=self._parse_csv(self._parse_expression),
3505                into=self._match(TokenType.INTO) and self._parse_table_part(),
3506            )
3507        )
3508
3509    def _parse_row(self) -> exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty | None:
3510        if not self._match(TokenType.FORMAT):
3511            return None
3512        return self._parse_row_format()
3513
3514    def _parse_serde_properties(self, with_: bool = False) -> exp.SerdeProperties | None:
3515        index = self._index
3516        with_ = with_ or self._match_text_seq("WITH")
3517
3518        if not self._match(TokenType.SERDE_PROPERTIES):
3519            self._retreat(index)
3520            return None
3521        return self.expression(
3522            exp.SerdeProperties(expressions=self._parse_wrapped_properties(), with_=with_)
3523        )
3524
3525    def _parse_row_format(
3526        self, match_row: bool = False
3527    ) -> exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty | None:
3528        if match_row and not self._match_pair(TokenType.ROW, TokenType.FORMAT):
3529            return None
3530
3531        if self._match_text_seq("SERDE"):
3532            this = self._parse_string()
3533
3534            serde_properties = self._parse_serde_properties()
3535
3536            return self.expression(
3537                exp.RowFormatSerdeProperty(this=this, serde_properties=serde_properties)
3538            )
3539
3540        self._match_text_seq("DELIMITED")
3541
3542        kwargs = {}
3543
3544        if self._match_text_seq("FIELDS", "TERMINATED", "BY"):
3545            kwargs["fields"] = self._parse_string()
3546            if self._match_text_seq("ESCAPED", "BY"):
3547                kwargs["escaped"] = self._parse_string()
3548        if self._match_text_seq("COLLECTION", "ITEMS", "TERMINATED", "BY"):
3549            kwargs["collection_items"] = self._parse_string()
3550        if self._match_text_seq("MAP", "KEYS", "TERMINATED", "BY"):
3551            kwargs["map_keys"] = self._parse_string()
3552        if self._match_text_seq("LINES", "TERMINATED", "BY"):
3553            kwargs["lines"] = self._parse_string()
3554        if self._match_text_seq("NULL", "DEFINED", "AS"):
3555            kwargs["null"] = self._parse_string()
3556
3557        return self.expression(exp.RowFormatDelimitedProperty(**kwargs))  # type: ignore
3558
3559    def _parse_load(self) -> exp.LoadData | exp.Command:
3560        if self._match_text_seq("DATA"):
3561            local = self._match_text_seq("LOCAL")
3562            self._match_text_seq("INPATH")
3563            inpath = self._parse_string()
3564            overwrite = self._match(TokenType.OVERWRITE)
3565            self._match_pair(TokenType.INTO, TokenType.TABLE)
3566
3567            return self.expression(
3568                exp.LoadData(
3569                    this=self._parse_table(schema=True),
3570                    local=local,
3571                    overwrite=overwrite,
3572                    inpath=inpath,
3573                    files=self._match_text_seq("FROM", "FILES")
3574                    and exp.Properties(expressions=self._parse_wrapped_properties()),
3575                    partition=self._parse_partition(),
3576                    input_format=self._match_text_seq("INPUTFORMAT") and self._parse_string(),
3577                    serde=self._match_text_seq("SERDE") and self._parse_string(),
3578                )
3579            )
3580        return self._parse_as_command(self._prev)
3581
3582    def _parse_delete(self) -> exp.Delete:
3583        hint = self._parse_hint()
3584
3585        # This handles MySQL's "Multiple-Table Syntax"
3586        # https://dev.mysql.com/doc/refman/8.0/en/delete.html
3587        tables = None
3588        if not self._match(TokenType.FROM, advance=False):
3589            tables = self._parse_csv(self._parse_table) or None
3590
3591        returning = self._parse_returning()
3592
3593        return self.expression(
3594            exp.Delete(
3595                hint=hint,
3596                tables=tables,
3597                this=self._match(TokenType.FROM) and self._parse_table(joins=True),
3598                using=self._match(TokenType.USING)
3599                and self._parse_csv(lambda: self._parse_table(joins=True)),
3600                cluster=self._match(TokenType.ON) and self._parse_on_property(),
3601                where=self._parse_where(),
3602                returning=returning or self._parse_returning(),
3603                order=self._parse_order(),
3604                limit=self._parse_limit(),
3605            )
3606        )
3607
3608    def _parse_update(self) -> exp.Update:
3609        hint = self._parse_hint()
3610        kwargs: dict[str, object] = {
3611            "hint": hint,
3612            "this": self._parse_table(joins=True, alias_tokens=self.UPDATE_ALIAS_TOKENS),
3613        }
3614        while self._curr:
3615            if self._match(TokenType.SET):
3616                kwargs["expressions"] = self._parse_csv(self._parse_equality)
3617            elif self._match(TokenType.RETURNING, advance=False):
3618                kwargs["returning"] = self._parse_returning()
3619            elif self._match(TokenType.FROM, advance=False):
3620                from_ = self._parse_from(joins=True)
3621                table = from_.this if from_ else None
3622                if isinstance(table, exp.Subquery) and self._match(TokenType.JOIN, advance=False):
3623                    table.set("joins", list(self._parse_joins()) or None)
3624
3625                kwargs["from_"] = from_
3626            elif self._match(TokenType.WHERE, advance=False):
3627                kwargs["where"] = self._parse_where()
3628            elif self._match(TokenType.ORDER_BY, advance=False):
3629                kwargs["order"] = self._parse_order()
3630            elif self._match(TokenType.LIMIT, advance=False):
3631                kwargs["limit"] = self._parse_limit()
3632            else:
3633                break
3634
3635        return self.expression(exp.Update(**kwargs))
3636
3637    def _parse_use(self) -> exp.Use:
3638        return self.expression(
3639            exp.Use(
3640                kind=self._parse_var_from_options(self.USABLES, raise_unmatched=False),
3641                this=self._parse_table(schema=False),
3642            )
3643        )
3644
3645    def _parse_uncache(self) -> exp.Uncache:
3646        if not self._match(TokenType.TABLE):
3647            self.raise_error("Expecting TABLE after UNCACHE")
3648
3649        return self.expression(
3650            exp.Uncache(exists=self._parse_exists(), this=self._parse_table(schema=True))
3651        )
3652
3653    def _parse_cache(self) -> exp.Cache:
3654        lazy = self._match_text_seq("LAZY")
3655        self._match(TokenType.TABLE)
3656        table = self._parse_table(schema=True)
3657
3658        options = []
3659        if self._match_text_seq("OPTIONS"):
3660            self._match_l_paren()
3661            k = self._parse_string()
3662            self._match(TokenType.EQ)
3663            v = self._parse_string()
3664            options = [k, v]
3665            self._match_r_paren()
3666
3667        self._match(TokenType.ALIAS)
3668        return self.expression(
3669            exp.Cache(
3670                this=table, lazy=lazy, options=options, expression=self._parse_select(nested=True)
3671            )
3672        )
3673
3674    def _parse_partition(self) -> exp.Partition | None:
3675        if not self._match_texts(self.PARTITION_KEYWORDS):
3676            return None
3677
3678        return self.expression(
3679            exp.Partition(
3680                subpartition=self._prev.text.upper() == "SUBPARTITION",
3681                expressions=self._parse_wrapped_csv(self._parse_disjunction),
3682            )
3683        )
3684
3685    def _parse_value(self, values: bool = True) -> exp.Tuple | None:
3686        def _parse_value_expression() -> exp.Expr | None:
3687            if self.dialect.SUPPORTS_VALUES_DEFAULT and self._match(TokenType.DEFAULT):
3688                return exp.var(self._prev.text.upper())
3689            return self._parse_expression()
3690
3691        if self._match(TokenType.L_PAREN):
3692            expressions = self._parse_csv(_parse_value_expression)
3693            self._match_r_paren()
3694            return self.expression(exp.Tuple(expressions=expressions))
3695
3696        # In some dialects we can have VALUES 1, 2 which results in 1 column & 2 rows.
3697        expression = self._parse_expression()
3698        if expression:
3699            return self.expression(exp.Tuple(expressions=[expression]))
3700        return None
3701
3702    def _parse_projections(
3703        self,
3704    ) -> tuple[list[exp.Expr], list[exp.Expr] | None]:
3705        return self._parse_expressions(), None
3706
3707    def _parse_wrapped_select(self, table: bool = False) -> exp.Expr | None:
3708        if self._match_set((TokenType.PIVOT, TokenType.UNPIVOT)):
3709            this: exp.Expr | None = self._parse_simplified_pivot(
3710                is_unpivot=self._prev.token_type == TokenType.UNPIVOT
3711            )
3712        elif self._match(TokenType.FROM):
3713            from_ = self._parse_from(skip_from_token=True, consume_pipe=True)
3714            # Support parentheses for duckdb FROM-first syntax
3715            select = self._parse_select(from_=from_)
3716            if select:
3717                if not select.args.get("from_"):
3718                    select.set("from_", from_)
3719                this = select
3720            else:
3721                this = exp.select("*").from_(t.cast(exp.From, from_))
3722                this = self._parse_query_modifiers(self._parse_set_operations(this))
3723        else:
3724            this = (
3725                self._parse_table(consume_pipe=True)
3726                if table
3727                else self._parse_select(nested=True, parse_set_operation=False)
3728            )
3729
3730            # Transform exp.Values into a exp.Table to pass through parse_query_modifiers
3731            # in case a modifier (e.g. join) is following
3732            if table and isinstance(this, exp.Values) and this.alias:
3733                alias = this.args["alias"].pop()
3734                this = exp.Table(this=this, alias=alias)
3735
3736            this = self._parse_query_modifiers(self._parse_set_operations(this))
3737
3738        return this
3739
3740    def _parse_select(
3741        self,
3742        nested: bool = False,
3743        table: bool = False,
3744        parse_subquery_alias: bool = True,
3745        parse_set_operation: bool = True,
3746        consume_pipe: bool = True,
3747        from_: exp.From | None = None,
3748    ) -> exp.Expr | None:
3749        query = self._parse_select_query(
3750            nested=nested,
3751            table=table,
3752            parse_subquery_alias=parse_subquery_alias,
3753            parse_set_operation=parse_set_operation,
3754        )
3755
3756        if consume_pipe and self._match(TokenType.PIPE_GT, advance=False):
3757            if not query and from_:
3758                query = exp.select("*").from_(from_)
3759            if isinstance(query, exp.Query):
3760                query = self._parse_pipe_syntax_query(query)
3761                query = query.subquery(copy=False) if query and table else query
3762
3763        return query
3764
3765    def _parse_select_query(
3766        self,
3767        nested: bool = False,
3768        table: bool = False,
3769        parse_subquery_alias: bool = True,
3770        parse_set_operation: bool = True,
3771    ) -> exp.Expr | None:
3772        cte = self._parse_with()
3773
3774        if cte:
3775            this = self._parse_statement()
3776
3777            if not this:
3778                self.raise_error("Failed to parse any statement following CTE")
3779                return cte
3780
3781            while isinstance(this, exp.Subquery) and this.is_wrapper:
3782                this = this.this
3783
3784            assert this is not None
3785            if "with_" in this.arg_types:
3786                this.set("with_", cte)
3787            else:
3788                self.raise_error(f"{this.key} does not support CTE")
3789                this = cte
3790
3791            return this
3792
3793        # duckdb supports leading with FROM x
3794        from_ = (
3795            self._parse_from(joins=True, consume_pipe=True)
3796            if self._match(TokenType.FROM, advance=False)
3797            else None
3798        )
3799
3800        if self._match(TokenType.SELECT):
3801            comments = self._prev_comments
3802
3803            hint = self._parse_hint()
3804
3805            if self._next and not self._next.token_type == TokenType.DOT:
3806                all_ = self._match(TokenType.ALL)
3807                matched_distinct = self._match_set(self.DISTINCT_TOKENS)
3808            else:
3809                all_, matched_distinct = None, False
3810
3811            kind = (
3812                self._prev.text.upper()
3813                if self._match(TokenType.ALIAS) and self._match_texts(("STRUCT", "VALUE"))
3814                else None
3815            )
3816
3817            distinct: exp.Expr | None = (
3818                self.expression(
3819                    exp.Distinct(
3820                        on=self._parse_value(values=False) if self._match(TokenType.ON) else None
3821                    )
3822                )
3823                if matched_distinct
3824                else None
3825            )
3826
3827            if all_ and distinct:
3828                self.raise_error("Cannot specify both ALL and DISTINCT after SELECT")
3829
3830            operation_modifiers = []
3831            while self._curr and self._match_texts(self.OPERATION_MODIFIERS):
3832                operation_modifiers.append(exp.var(self._prev.text.upper()))
3833
3834            limit = self._parse_limit(top=True)
3835            projections, exclude = self._parse_projections()
3836
3837            this = self.expression(
3838                exp.Select(
3839                    kind=kind,
3840                    hint=hint,
3841                    distinct=distinct,
3842                    expressions=projections,
3843                    limit=limit,
3844                    exclude=exclude,
3845                    operation_modifiers=operation_modifiers or None,
3846                )
3847            )
3848            this.comments = comments
3849
3850            into = self._parse_into()
3851            if into:
3852                this.set("into", into)
3853
3854            if not from_:
3855                from_ = self._parse_from()
3856
3857            if from_:
3858                this.set("from_", from_)
3859
3860            this = self._parse_query_modifiers(this)
3861        elif (table or nested) and self._match(TokenType.L_PAREN):
3862            comments = self._prev_comments
3863            this = self._parse_wrapped_select(table=table)
3864
3865            if this:
3866                this.add_comments(comments, prepend=True)
3867
3868            # We return early here so that the UNION isn't attached to the subquery by the
3869            # following call to _parse_set_operations, but instead becomes the parent node
3870            self._match_r_paren()
3871            return self._parse_subquery(this, parse_alias=parse_subquery_alias)
3872        elif self._match(TokenType.VALUES, advance=False):
3873            this = self._parse_derived_table_values()
3874        elif from_:
3875            this = exp.select("*").from_(from_.this, copy=False)
3876            this = self._parse_query_modifiers(this)
3877        elif self._match(TokenType.SUMMARIZE):
3878            table = self._match(TokenType.TABLE)
3879            this = self._parse_select() or self._parse_string() or self._parse_table()
3880            return self.expression(exp.Summarize(this=this, table=table))
3881        elif self._match(TokenType.DESCRIBE):
3882            this = self._parse_describe()
3883        else:
3884            this = None
3885
3886        return self._parse_set_operations(this) if parse_set_operation else this
3887
3888    def _parse_recursive_with_search(self) -> exp.RecursiveWithSearch | None:
3889        self._match_text_seq("SEARCH")
3890
3891        kind = self._match_texts(self.RECURSIVE_CTE_SEARCH_KIND) and self._prev.text.upper()
3892
3893        if not kind:
3894            return None
3895
3896        self._match_text_seq("FIRST", "BY")
3897
3898        return self.expression(
3899            exp.RecursiveWithSearch(
3900                kind=kind,
3901                this=self._parse_id_var(),
3902                expression=self._match_text_seq("SET") and self._parse_id_var(),
3903                using=self._match_text_seq("USING") and self._parse_id_var(),
3904            )
3905        )
3906
3907    def _parse_with(self, skip_with_token: bool = False) -> exp.With | None:
3908        if not skip_with_token and not self._match(TokenType.WITH):
3909            return None
3910
3911        comments = self._prev_comments
3912        recursive = self._match(TokenType.RECURSIVE)
3913
3914        last_comments = None
3915        expressions = []
3916        while True:
3917            cte = self._parse_cte()
3918            if isinstance(cte, exp.CTE):
3919                expressions.append(cte)
3920                if last_comments:
3921                    cte.add_comments(last_comments)
3922
3923            if not self._match(TokenType.COMMA) and not self._match(TokenType.WITH):
3924                break
3925            else:
3926                self._match(TokenType.WITH)
3927
3928            last_comments = self._prev_comments
3929
3930        return self.expression(
3931            exp.With(
3932                expressions=expressions,
3933                recursive=recursive or None,
3934                search=self._parse_recursive_with_search(),
3935            ),
3936            comments=comments,
3937        )
3938
3939    def _parse_cte(self) -> exp.CTE | None:
3940        index = self._index
3941
3942        alias = self._parse_table_alias(self.ID_VAR_TOKENS)
3943        if not alias or not alias.this:
3944            self.raise_error("Expected CTE to have alias")
3945
3946        key_expressions = (
3947            self._parse_wrapped_id_vars() if self._match_text_seq("USING", "KEY") else None
3948        )
3949
3950        if not self._match(TokenType.ALIAS) and not self.OPTIONAL_ALIAS_TOKEN_CTE:
3951            self._retreat(index)
3952            return None
3953
3954        comments = self._prev_comments
3955
3956        if self._match_text_seq("NOT", "MATERIALIZED"):
3957            materialized = False
3958        elif self._match_text_seq("MATERIALIZED"):
3959            materialized = True
3960        else:
3961            materialized = None
3962
3963        cte = self.expression(
3964            exp.CTE(
3965                this=self._parse_wrapped(self._parse_statement),
3966                alias=alias,
3967                materialized=materialized,
3968                key_expressions=key_expressions,
3969            ),
3970            comments=comments,
3971        )
3972
3973        values = cte.this
3974        if isinstance(values, exp.Values):
3975            if values.alias:
3976                cte.set("this", exp.select("*").from_(values))
3977            else:
3978                cte.set("this", exp.select("*").from_(exp.alias_(values, "_values", table=True)))
3979
3980        return cte
3981
3982    def _parse_table_alias(
3983        self, alias_tokens: t.Collection[TokenType] | None = None
3984    ) -> exp.TableAlias | None:
3985        # In some dialects, LIMIT and OFFSET can act as both identifiers and keywords (clauses)
3986        # so this section tries to parse the clause version and if it fails, it treats the token
3987        # as an identifier (alias)
3988        if self._can_parse_limit_or_offset():
3989            return None
3990
3991        any_token = self._match(TokenType.ALIAS)
3992        alias = (
3993            self._parse_id_var(any_token=any_token, tokens=alias_tokens or self.TABLE_ALIAS_TOKENS)
3994            or self._parse_string_as_identifier()
3995        )
3996
3997        index = self._index
3998        if self._match(TokenType.L_PAREN):
3999            columns = self._parse_csv(self._parse_function_parameter)
4000            self._match_r_paren() if columns else self._retreat(index)
4001        else:
4002            columns = None
4003
4004        if not alias and not columns:
4005            return None
4006
4007        table_alias = self.expression(exp.TableAlias(this=alias, columns=columns))
4008
4009        # We bubble up comments from the Identifier to the TableAlias
4010        if isinstance(alias, exp.Identifier):
4011            table_alias.add_comments(alias.pop_comments())
4012
4013        return table_alias
4014
4015    def _parse_subquery(
4016        self, this: exp.Expr | None, parse_alias: bool = True
4017    ) -> exp.Subquery | None:
4018        if not this:
4019            return None
4020
4021        return self.expression(
4022            exp.Subquery(
4023                this=this,
4024                pivots=self._parse_pivots(),
4025                alias=self._parse_table_alias() if parse_alias else None,
4026                sample=self._parse_table_sample(),
4027            )
4028        )
4029
4030    def _implicit_unnests_to_explicit(self, this: E) -> E:
4031        from sqlglot.optimizer.normalize_identifiers import normalize_identifiers as _norm
4032
4033        refs = {_norm(this.args["from_"].this.copy(), dialect=self.dialect).alias_or_name}
4034        for i, join in enumerate(this.args.get("joins") or []):
4035            table = join.this
4036            normalized_table = table.copy()
4037            normalized_table.meta["maybe_column"] = True
4038            normalized_table = _norm(normalized_table, dialect=self.dialect)
4039
4040            if isinstance(table, exp.Table) and not join.args.get("on"):
4041                if normalized_table.parts[0].name in refs:
4042                    table_as_column = table.to_column()
4043                    unnest = exp.Unnest(expressions=[table_as_column])
4044
4045                    # Table.to_column creates a parent Alias node that we want to convert to
4046                    # a TableAlias and attach to the Unnest, so it matches the parser's output
4047                    if isinstance(table.args.get("alias"), exp.TableAlias):
4048                        table_as_column.replace(table_as_column.this)
4049                        exp.alias_(unnest, None, table=[table.args["alias"].this], copy=False)
4050
4051                    table.replace(unnest)
4052
4053            refs.add(normalized_table.alias_or_name)
4054
4055        return this
4056
4057    @t.overload
4058    def _parse_query_modifiers(self, this: E) -> E: ...
4059
4060    @t.overload
4061    def _parse_query_modifiers(self, this: None) -> None: ...
4062
4063    def _parse_query_modifiers(self, this):
4064        if isinstance(this, self.MODIFIABLES):
4065            for join in self._parse_joins():
4066                this.append("joins", join)
4067            for lateral in iter(self._parse_lateral, None):
4068                this.append("laterals", lateral)
4069
4070            while True:
4071                if self._match_set(self.QUERY_MODIFIER_PARSERS, advance=False):
4072                    modifier_token = self._curr
4073                    parser = self.QUERY_MODIFIER_PARSERS[modifier_token.token_type]
4074                    key, expression = parser(self)
4075
4076                    if expression:
4077                        if this.args.get(key):
4078                            self.raise_error(
4079                                f"Found multiple '{modifier_token.text.upper()}' clauses",
4080                                token=modifier_token,
4081                            )
4082
4083                        this.set(key, expression)
4084                        if key == "limit":
4085                            offset = expression.args.get("offset")
4086                            expression.set("offset", None)
4087
4088                            if offset:
4089                                offset = exp.Offset(expression=offset)
4090                                this.set("offset", offset)
4091
4092                                limit_by_expressions = expression.expressions
4093                                expression.set("expressions", None)
4094                                offset.set("expressions", limit_by_expressions)
4095                        continue
4096                break
4097
4098        if self.SUPPORTS_IMPLICIT_UNNEST and this and this.args.get("from_"):
4099            this = self._implicit_unnests_to_explicit(this)
4100
4101        return this
4102
4103    def _parse_hint_fallback_to_string(self) -> exp.Hint | None:
4104        start = self._curr
4105        while self._curr:
4106            self._advance()
4107
4108        end = self._tokens[self._index - 1]
4109        return exp.Hint(expressions=[self._find_sql(start, end)])
4110
4111    def _parse_hint_function_call(self) -> exp.Expr | None:
4112        return self._parse_function_call()
4113
4114    def _parse_hint_body(self) -> exp.Hint | None:
4115        start_index = self._index
4116        should_fallback_to_string = False
4117
4118        hints = []
4119        try:
4120            for hint in iter(
4121                lambda: self._parse_csv(
4122                    lambda: self._parse_hint_function_call() or self._parse_var(upper=True),
4123                ),
4124                [],
4125            ):
4126                hints.extend(hint)
4127        except ParseError:
4128            should_fallback_to_string = True
4129
4130        if should_fallback_to_string or self._curr:
4131            self._retreat(start_index)
4132            return self._parse_hint_fallback_to_string()
4133
4134        return self.expression(exp.Hint(expressions=hints))
4135
4136    def _parse_hint(self) -> exp.Hint | None:
4137        if self._match(TokenType.HINT) and self._prev_comments:
4138            return exp.maybe_parse(self._prev_comments[0], into=exp.Hint, dialect=self.dialect)
4139
4140        return None
4141
4142    def _parse_into(self) -> exp.Into | None:
4143        if not self._match(TokenType.INTO):
4144            return None
4145
4146        temp = self._match(TokenType.TEMPORARY)
4147        unlogged = self._match_text_seq("UNLOGGED")
4148        self._match(TokenType.TABLE)
4149
4150        return self.expression(
4151            exp.Into(this=self._parse_table(schema=True), temporary=temp, unlogged=unlogged)
4152        )
4153
4154    def _parse_from(
4155        self,
4156        joins: bool = False,
4157        skip_from_token: bool = False,
4158        consume_pipe: bool = False,
4159    ) -> exp.From | None:
4160        if not skip_from_token and not self._match(TokenType.FROM):
4161            return None
4162
4163        comments = self._prev_comments
4164        return self.expression(
4165            exp.From(this=self._parse_table(joins=joins, consume_pipe=consume_pipe)),
4166            comments=comments,
4167        )
4168
4169    def _parse_match_recognize_measure(self) -> exp.MatchRecognizeMeasure:
4170        return self.expression(
4171            exp.MatchRecognizeMeasure(
4172                window_frame=self._match_texts(("FINAL", "RUNNING")) and self._prev.text.upper(),
4173                this=self._parse_expression(),
4174            )
4175        )
4176
4177    def _parse_match_recognize(self) -> exp.MatchRecognize | None:
4178        if not self._match(TokenType.MATCH_RECOGNIZE):
4179            return None
4180
4181        self._match_l_paren()
4182
4183        partition = self._parse_partition_by()
4184        order = self._parse_order()
4185
4186        measures = (
4187            self._parse_csv(self._parse_match_recognize_measure)
4188            if self._match_text_seq("MEASURES")
4189            else None
4190        )
4191
4192        if self._match_text_seq("ONE", "ROW", "PER", "MATCH"):
4193            rows = exp.var("ONE ROW PER MATCH")
4194        elif self._match_text_seq("ALL", "ROWS", "PER", "MATCH"):
4195            text = "ALL ROWS PER MATCH"
4196            if self._match_text_seq("SHOW", "EMPTY", "MATCHES"):
4197                text += " SHOW EMPTY MATCHES"
4198            elif self._match_text_seq("OMIT", "EMPTY", "MATCHES"):
4199                text += " OMIT EMPTY MATCHES"
4200            elif self._match_text_seq("WITH", "UNMATCHED", "ROWS"):
4201                text += " WITH UNMATCHED ROWS"
4202            rows = exp.var(text)
4203        else:
4204            rows = None
4205
4206        if self._match_text_seq("AFTER", "MATCH", "SKIP"):
4207            text = "AFTER MATCH SKIP"
4208            if self._match_text_seq("PAST", "LAST", "ROW"):
4209                text += " PAST LAST ROW"
4210            elif self._match_text_seq("TO", "NEXT", "ROW"):
4211                text += " TO NEXT ROW"
4212            elif self._match_text_seq("TO", "FIRST"):
4213                text += f" TO FIRST {self._advance_any().text}"  # type: ignore
4214            elif self._match_text_seq("TO", "LAST"):
4215                text += f" TO LAST {self._advance_any().text}"  # type: ignore
4216            after = exp.var(text)
4217        else:
4218            after = None
4219
4220        if self._match_text_seq("PATTERN"):
4221            self._match_l_paren()
4222
4223            if not self._curr:
4224                self.raise_error("Expecting )", self._curr)
4225
4226            paren = 1
4227            start = self._curr
4228
4229            while self._curr and paren > 0:
4230                if self._curr.token_type == TokenType.L_PAREN:
4231                    paren += 1
4232                if self._curr.token_type == TokenType.R_PAREN:
4233                    paren -= 1
4234
4235                end = self._prev
4236                self._advance()
4237
4238            if paren > 0:
4239                self.raise_error("Expecting )", self._curr)
4240
4241            pattern = exp.var(self._find_sql(start, end))
4242        else:
4243            pattern = None
4244
4245        define = (
4246            self._parse_csv(self._parse_name_as_expression)
4247            if self._match_text_seq("DEFINE")
4248            else None
4249        )
4250
4251        self._match_r_paren()
4252
4253        return self.expression(
4254            exp.MatchRecognize(
4255                partition_by=partition,
4256                order=order,
4257                measures=measures,
4258                rows=rows,
4259                after=after,
4260                pattern=pattern,
4261                define=define,
4262                alias=self._parse_table_alias(),
4263            )
4264        )
4265
4266    def _parse_lateral(self) -> exp.Lateral | None:
4267        cross_apply: bool | None = None
4268        if self._match_pair(TokenType.CROSS, TokenType.APPLY):
4269            cross_apply = True
4270        elif self._match_pair(TokenType.OUTER, TokenType.APPLY):
4271            cross_apply = False
4272
4273        if cross_apply is not None:
4274            this = self._parse_select(table=True)
4275            view = None
4276            outer = None
4277        elif self._match(TokenType.LATERAL):
4278            this = self._parse_select(table=True)
4279            view = self._match(TokenType.VIEW)
4280            outer = self._match(TokenType.OUTER)
4281        else:
4282            return None
4283
4284        if not this:
4285            this = (
4286                self._parse_unnest()
4287                or self._parse_function()
4288                or self._parse_id_var(any_token=False)
4289            )
4290
4291            while self._match(TokenType.DOT):
4292                this = exp.Dot(
4293                    this=this,
4294                    expression=self._parse_function() or self._parse_id_var(any_token=False),
4295                )
4296
4297        ordinality: bool | None = None
4298
4299        if view:
4300            table = self._parse_id_var(any_token=False)
4301            columns = self._parse_csv(self._parse_id_var) if self._match(TokenType.ALIAS) else []
4302            table_alias: exp.TableAlias | None = self.expression(
4303                exp.TableAlias(this=table, columns=columns)
4304            )
4305        elif isinstance(this, (exp.Subquery, exp.Unnest)) and this.alias:
4306            # We move the alias from the lateral's child node to the lateral itself
4307            table_alias = this.args["alias"].pop()
4308        else:
4309            ordinality = self._match_pair(TokenType.WITH, TokenType.ORDINALITY)
4310            table_alias = self._parse_table_alias()
4311
4312        return self.expression(
4313            exp.Lateral(
4314                this=this,
4315                view=view,
4316                outer=outer,
4317                alias=table_alias,
4318                cross_apply=cross_apply,
4319                ordinality=ordinality,
4320            )
4321        )
4322
4323    def _parse_stream(self) -> exp.Stream | None:
4324        index = self._index
4325        if self._match(TokenType.STREAM):
4326            if this := self._try_parse(self._parse_table):
4327                return self.expression(exp.Stream(this=this))
4328            self._retreat(index)
4329        return None
4330
4331    def _parse_join_parts(
4332        self,
4333    ) -> tuple[Token | None, Token | None, Token | None]:
4334        return (
4335            self._prev if self._match_set(self.JOIN_METHODS) else None,
4336            self._prev if self._match_set(self.JOIN_SIDES) else None,
4337            self._prev if self._match_set(self.JOIN_KINDS) else None,
4338        )
4339
4340    def _parse_using_identifiers(self) -> list[exp.Expr]:
4341        def _parse_column_as_identifier() -> exp.Expr | None:
4342            this = self._parse_column()
4343            if isinstance(this, exp.Column):
4344                return this.this
4345            return this
4346
4347        return self._parse_wrapped_csv(_parse_column_as_identifier, optional=True)
4348
4349    def _parse_join(
4350        self, skip_join_token: bool = False, parse_bracket: bool = False
4351    ) -> exp.Join | None:
4352        if self._match(TokenType.COMMA):
4353            table = self._try_parse(self._parse_table)
4354            cross_join = self.expression(exp.Join(this=table)) if table else None
4355
4356            if cross_join and self.JOINS_HAVE_EQUAL_PRECEDENCE:
4357                cross_join.set("kind", "CROSS")
4358
4359            return cross_join
4360
4361        index = self._index
4362        method, side, kind = self._parse_join_parts()
4363        directed = self._match_text_seq("DIRECTED")
4364        hint = self._prev.text if self._match_texts(self.JOIN_HINTS) else None
4365        join = self._match(TokenType.JOIN) or (kind and kind.token_type == TokenType.STRAIGHT_JOIN)
4366        join_comments = self._prev_comments
4367
4368        if not skip_join_token and not join:
4369            self._retreat(index)
4370            kind = None
4371            method = None
4372            side = None
4373
4374        outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY, False)
4375        cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY, False)
4376
4377        if not skip_join_token and not join and not outer_apply and not cross_apply:
4378            return None
4379
4380        kwargs: dict[str, t.Any] = {"this": self._parse_table(parse_bracket=parse_bracket)}
4381        if kind and kind.token_type == TokenType.ARRAY and self._match(TokenType.COMMA):
4382            kwargs["expressions"] = self._parse_csv(
4383                lambda: self._parse_table(parse_bracket=parse_bracket)
4384            )
4385
4386        if method:
4387            kwargs["method"] = method.text.upper()
4388        if side:
4389            kwargs["side"] = side.text.upper()
4390        if kind:
4391            kwargs["kind"] = kind.text.upper()
4392        if hint:
4393            kwargs["hint"] = hint
4394
4395        if self._match(TokenType.MATCH_CONDITION):
4396            kwargs["match_condition"] = self._parse_wrapped(self._parse_comparison)
4397
4398        if self._match(TokenType.ON):
4399            kwargs["on"] = self._parse_disjunction()
4400        elif self._match(TokenType.USING):
4401            kwargs["using"] = self._parse_using_identifiers()
4402        elif (
4403            not method
4404            and not (outer_apply or cross_apply)
4405            and not isinstance(kwargs["this"], exp.Unnest)
4406            and not (kind and kind.token_type in (TokenType.CROSS, TokenType.ARRAY))
4407        ):
4408            index = self._index
4409            joins: list | None = list(self._parse_joins())
4410
4411            if joins and self._match(TokenType.ON):
4412                kwargs["on"] = self._parse_disjunction()
4413            elif joins and self._match(TokenType.USING):
4414                kwargs["using"] = self._parse_using_identifiers()
4415            else:
4416                joins = None
4417                self._retreat(index)
4418
4419            kwargs["this"].set("joins", joins if joins else None)
4420
4421        kwargs["pivots"] = self._parse_pivots()
4422
4423        comments = [c for token in (method, side, kind) if token for c in token.comments]
4424        comments = (join_comments or []) + comments
4425
4426        if (
4427            self.ADD_JOIN_ON_TRUE
4428            and not kwargs.get("on")
4429            and not kwargs.get("using")
4430            and not kwargs.get("method")
4431            and kwargs.get("kind") in (None, "INNER", "OUTER")
4432        ):
4433            kwargs["on"] = exp.true()
4434
4435        if directed:
4436            kwargs["directed"] = directed
4437
4438        return self.expression(exp.Join(**kwargs), comments=comments)
4439
4440    def _parse_opclass(self) -> exp.Expr | None:
4441        this = self._parse_disjunction()
4442
4443        if self._match_texts(self.OPCLASS_FOLLOW_KEYWORDS, advance=False):
4444            return this
4445
4446        if not self._match_set(self.OPTYPE_FOLLOW_TOKENS, advance=False):
4447            return self.expression(exp.Opclass(this=this, expression=self._parse_table_parts()))
4448
4449        return this
4450
4451    def _parse_index_params(self) -> exp.IndexParameters:
4452        using = self._parse_var(any_token=True) if self._match(TokenType.USING) else None
4453
4454        if self._match(TokenType.L_PAREN, advance=False):
4455            columns = self._parse_wrapped_csv(self._parse_with_operator)
4456        else:
4457            columns = None
4458
4459        include = self._parse_wrapped_id_vars() if self._match_text_seq("INCLUDE") else None
4460        partition_by = self._parse_partition_by()
4461        with_storage = self._match(TokenType.WITH) and self._parse_wrapped_properties()
4462        tablespace = (
4463            self._parse_var(any_token=True)
4464            if self._match_text_seq("USING", "INDEX", "TABLESPACE")
4465            else None
4466        )
4467        where = self._parse_where()
4468
4469        on = self._parse_field() if self._match(TokenType.ON) else None
4470
4471        return self.expression(
4472            exp.IndexParameters(
4473                using=using,
4474                columns=columns,
4475                include=include,
4476                partition_by=partition_by,
4477                where=where,
4478                with_storage=with_storage,
4479                tablespace=tablespace,
4480                on=on,
4481            )
4482        )
4483
4484    def _parse_index(
4485        self, index: exp.Expr | None = None, anonymous: bool = False
4486    ) -> exp.Index | None:
4487        if index or anonymous:
4488            unique = None
4489            primary = None
4490            amp = None
4491
4492            self._match(TokenType.ON)
4493            self._match(TokenType.TABLE)  # hive
4494            table = self._parse_table_parts(schema=True)
4495        else:
4496            unique = self._match(TokenType.UNIQUE)
4497            primary = self._match_text_seq("PRIMARY")
4498            amp = self._match_text_seq("AMP")
4499
4500            if not self._match(TokenType.INDEX):
4501                return None
4502
4503            index = self._parse_id_var()
4504            table = None
4505
4506        params = self._parse_index_params()
4507
4508        return self.expression(
4509            exp.Index(
4510                this=index, table=table, unique=unique, primary=primary, amp=amp, params=params
4511            )
4512        )
4513
4514    def _parse_table_hints(self) -> list[exp.Expr] | None:
4515        hints: list[exp.Expr] = []
4516        if self._match_pair(TokenType.WITH, TokenType.L_PAREN):
4517            # https://learn.microsoft.com/en-us/sql/t-sql/queries/hints-transact-sql-table?view=sql-server-ver16
4518            hints.append(
4519                self.expression(
4520                    exp.WithTableHint(
4521                        expressions=self._parse_csv(
4522                            lambda: self._parse_function() or self._parse_var(any_token=True)
4523                        )
4524                    )
4525                )
4526            )
4527            self._match_r_paren()
4528        else:
4529            # https://dev.mysql.com/doc/refman/8.0/en/index-hints.html
4530            while self._match_set(self.TABLE_INDEX_HINT_TOKENS):
4531                hint = exp.IndexTableHint(this=self._prev.text.upper())
4532
4533                self._match_set((TokenType.INDEX, TokenType.KEY))
4534                if self._match(TokenType.FOR):
4535                    hint.set("target", self._advance_any() and self._prev.text.upper())
4536
4537                hint.set("expressions", self._parse_wrapped_id_vars())
4538                hints.append(hint)
4539
4540        return hints or None
4541
4542    def _parse_table_part(self, schema: bool = False) -> exp.Expr | None:
4543        return (
4544            (not schema and self._parse_function(optional_parens=False))
4545            or self._parse_id_var(any_token=False)
4546            or self._parse_string_as_identifier()
4547            or self._parse_placeholder()
4548        )
4549
4550    def _parse_table_parts_fast(self) -> exp.Table | None:
4551        index = self._index
4552        parts: list[exp.Identifier] | None = None
4553        all_comments: list[str] | None = None
4554
4555        while self._match_set(self.IDENTIFIER_TOKENS):
4556            token = self._prev
4557            comments = self._prev_comments
4558
4559            has_dot = self._match(TokenType.DOT)
4560            curr_tt = self._curr.token_type
4561
4562            if not has_dot:
4563                if curr_tt in self.TABLE_POSTFIX_TOKENS:
4564                    self._retreat(index)
4565                    return None
4566            elif curr_tt not in self.IDENTIFIER_TOKENS:
4567                self._retreat(index)
4568                return None
4569
4570            if parts is None:
4571                parts = []
4572
4573            if comments:
4574                if all_comments is None:
4575                    all_comments = []
4576                all_comments.extend(comments)
4577                self._prev_comments = []
4578
4579            parts.append(
4580                self.expression(
4581                    exp.Identifier(
4582                        this=token.text, quoted=token.token_type == TokenType.IDENTIFIER
4583                    ),
4584                    token,
4585                )
4586            )
4587
4588            if not has_dot:
4589                break
4590
4591        if parts is None:
4592            return None
4593
4594        n = len(parts)
4595
4596        if n == 1:
4597            table: exp.Table = exp.Table(this=parts[0])
4598        elif n == 2:
4599            table = exp.Table(this=parts[1], db=parts[0])
4600        elif n >= 3:
4601            this: exp.Identifier | exp.Dot = parts[2]
4602            for i in range(3, n):
4603                this = exp.Dot(this=this, expression=parts[i])
4604
4605            table = exp.Table(this=this, db=parts[1], catalog=parts[0])
4606
4607        if table is None:
4608            self._retreat(index)
4609        elif all_comments:
4610            table.add_comments(all_comments)
4611        return table
4612
4613    def _parse_table_parts(
4614        self,
4615        schema: bool = False,
4616        is_db_reference: bool = False,
4617        wildcard: bool = False,
4618        fast: bool = False,
4619    ) -> exp.Table | exp.Dot | None:
4620        if fast:
4621            return self._parse_table_parts_fast()
4622
4623        catalog: exp.Expr | str | None = None
4624        db: exp.Expr | str | None = None
4625        table: exp.Expr | str | None = self._parse_table_part(schema=schema)
4626
4627        while self._match(TokenType.DOT):
4628            if catalog:
4629                # This allows nesting the table in arbitrarily many dot expressions if needed
4630                table = self.expression(
4631                    exp.Dot(this=table, expression=self._parse_table_part(schema=schema))
4632                )
4633            else:
4634                catalog = db
4635                db = table
4636                # "" used for tsql FROM a..b case
4637                table = self._parse_table_part(schema=schema) or ""
4638
4639        if (
4640            wildcard
4641            and self._is_connected()
4642            and (isinstance(table, exp.Identifier) or not table)
4643            and self._match(TokenType.STAR)
4644        ):
4645            if isinstance(table, exp.Identifier):
4646                table.args["this"] += "*"
4647            else:
4648                table = exp.Identifier(this="*")
4649
4650        if is_db_reference:
4651            catalog = db
4652            db = table
4653            table = None
4654
4655        if not table and not is_db_reference:
4656            self.raise_error(f"Expected table name but got {self._curr}")
4657        if not db and is_db_reference:
4658            self.raise_error(f"Expected database name but got {self._curr}")
4659
4660        table = self.expression(exp.Table(this=table, db=db, catalog=catalog))
4661
4662        # Bubble up comments from identifier parts to the Table
4663        comments = []
4664        for part in table.parts:
4665            if part_comments := part.pop_comments():
4666                comments.extend(part_comments)
4667        if comments:
4668            table.add_comments(comments)
4669
4670        changes = self._parse_changes()
4671        if changes:
4672            table.set("changes", changes)
4673
4674        at_before = self._parse_historical_data()
4675        if at_before:
4676            table.set("when", at_before)
4677
4678        pivots = self._parse_pivots()
4679        if pivots:
4680            table.set("pivots", pivots)
4681
4682        return table
4683
4684    def _parse_table(
4685        self,
4686        schema: bool = False,
4687        joins: bool = False,
4688        alias_tokens: t.Collection[TokenType] | None = None,
4689        parse_bracket: bool = False,
4690        is_db_reference: bool = False,
4691        parse_partition: bool = False,
4692        consume_pipe: bool = False,
4693    ) -> exp.Expr | None:
4694        if not schema and not is_db_reference and not consume_pipe and not joins:
4695            index = self._index
4696            table = self._parse_table_parts(fast=True)
4697
4698            if table is not None:
4699                curr_tt = self._curr.token_type
4700                next_tt = self._next.token_type
4701
4702                fast_terminators = self.TABLE_TERMINATORS
4703
4704                # only return the table if we're sure there are no other operators
4705                # MATCH_CONDITION is a special case because it accepts any alias before it like LIMIT
4706                if curr_tt in fast_terminators and next_tt != TokenType.MATCH_CONDITION:
4707                    return table
4708
4709                postfix_tokens = self.TABLE_POSTFIX_TOKENS
4710
4711                if curr_tt not in postfix_tokens and next_tt not in postfix_tokens:
4712                    if alias := self._parse_table_alias(
4713                        alias_tokens=alias_tokens or self.TABLE_ALIAS_TOKENS
4714                    ):
4715                        table.set("alias", alias)
4716
4717                    if self._curr.token_type in fast_terminators:
4718                        return table
4719
4720                self._retreat(index)
4721
4722        if stream := self._parse_stream():
4723            return stream
4724
4725        if lateral := self._parse_lateral():
4726            return lateral
4727
4728        if unnest := self._parse_unnest():
4729            return unnest
4730
4731        if values := self._parse_derived_table_values():
4732            return values
4733
4734        if subquery := self._parse_select(table=True, consume_pipe=consume_pipe):
4735            if not subquery.args.get("pivots"):
4736                subquery.set("pivots", self._parse_pivots())
4737            return subquery
4738
4739        bracket = parse_bracket and self._parse_bracket(None)
4740        bracket = self.expression(exp.Table(this=bracket)) if bracket else None
4741
4742        rows_from_tables = (
4743            self._parse_wrapped_csv(self._parse_table)
4744            if self._match_text_seq("ROWS", "FROM")
4745            else None
4746        )
4747        rows_from = (
4748            self.expression(exp.Table(rows_from=rows_from_tables)) if rows_from_tables else None
4749        )
4750
4751        only = self._match(TokenType.ONLY)
4752
4753        this = t.cast(
4754            exp.Expr,
4755            bracket
4756            or rows_from
4757            or self._parse_bracket(
4758                self._parse_table_parts(schema=schema, is_db_reference=is_db_reference)
4759            ),
4760        )
4761
4762        if only:
4763            this.set("only", only)
4764
4765        # Postgres supports a wildcard (table) suffix operator, which is a no-op in this context
4766        self._match(TokenType.STAR)
4767
4768        parse_partition = parse_partition or self.SUPPORTS_PARTITION_SELECTION
4769        if parse_partition and self._match(TokenType.PARTITION, advance=False):
4770            this.set("partition", self._parse_partition())
4771
4772        if schema:
4773            return self._parse_schema(this=this)
4774
4775        if self.dialect.ALIAS_POST_VERSION:
4776            this.set("version", self._parse_version())
4777
4778        if self.dialect.ALIAS_POST_TABLESAMPLE:
4779            this.set("sample", self._parse_table_sample())
4780
4781        alias = self._parse_table_alias(alias_tokens=alias_tokens or self.TABLE_ALIAS_TOKENS)
4782        if alias:
4783            this.set("alias", alias)
4784
4785        if self._match(TokenType.INDEXED_BY):
4786            this.set("indexed", self._parse_table_parts())
4787        elif self._match_text_seq("NOT", "INDEXED"):
4788            this.set("indexed", False)
4789
4790        if isinstance(this, exp.Table) and self._match_text_seq("AT"):
4791            return self.expression(
4792                exp.AtIndex(this=this.to_column(copy=False), expression=self._parse_id_var())
4793            )
4794
4795        this.set("hints", self._parse_table_hints())
4796
4797        if not this.args.get("pivots"):
4798            this.set("pivots", self._parse_pivots())
4799
4800        if not self.dialect.ALIAS_POST_TABLESAMPLE:
4801            this.set("sample", self._parse_table_sample())
4802
4803        if not self.dialect.ALIAS_POST_VERSION:
4804            this.set("version", self._parse_version())
4805
4806        if joins:
4807            for join in self._parse_joins():
4808                this.append("joins", join)
4809
4810        if self._match_pair(TokenType.WITH, TokenType.ORDINALITY):
4811            this.set("ordinality", True)
4812            this.set("alias", self._parse_table_alias())
4813
4814        return this
4815
4816    def _parse_version(self) -> exp.Version | None:
4817        if self._match(TokenType.TIMESTAMP_SNAPSHOT):
4818            this = "TIMESTAMP"
4819        elif self._match(TokenType.VERSION_SNAPSHOT):
4820            this = "VERSION"
4821        else:
4822            return None
4823
4824        if self._match_set((TokenType.FROM, TokenType.BETWEEN)):
4825            kind = self._prev.text.upper()
4826            start = self._parse_bitwise()
4827            self._match_texts(("TO", "AND"))
4828            end = self._parse_bitwise()
4829            expression: exp.Expr | None = self.expression(exp.Tuple(expressions=[start, end]))
4830        elif self._match_text_seq("CONTAINED", "IN"):
4831            kind = "CONTAINED IN"
4832            expression = self.expression(
4833                exp.Tuple(expressions=self._parse_wrapped_csv(self._parse_bitwise))
4834            )
4835        elif self._match(TokenType.ALL):
4836            kind = "ALL"
4837            expression = None
4838        else:
4839            self._match_text_seq("AS", "OF")
4840            kind = "AS OF"
4841            expression = self._parse_type()
4842
4843        return self.expression(exp.Version(this=this, expression=expression, kind=kind))
4844
4845    def _parse_historical_data(self) -> exp.HistoricalData | None:
4846        # https://docs.snowflake.com/en/sql-reference/constructs/at-before
4847        index = self._index
4848        historical_data = None
4849        if self._match_texts(self.HISTORICAL_DATA_PREFIX):
4850            this = self._prev.text.upper()
4851            kind = (
4852                self._match(TokenType.L_PAREN)
4853                and self._match_texts(self.HISTORICAL_DATA_KIND)
4854                and self._prev.text.upper()
4855            )
4856            expression = self._match(TokenType.FARROW) and self._parse_bitwise()
4857
4858            if expression:
4859                self._match_r_paren()
4860                historical_data = self.expression(
4861                    exp.HistoricalData(this=this, kind=kind, expression=expression)
4862                )
4863            else:
4864                self._retreat(index)
4865
4866        return historical_data
4867
4868    def _parse_changes(self) -> exp.Changes | None:
4869        if not self._match_text_seq("CHANGES", "(", "INFORMATION", "=>"):
4870            return None
4871
4872        information = self._parse_var(any_token=True)
4873        self._match_r_paren()
4874
4875        return self.expression(
4876            exp.Changes(
4877                information=information,
4878                at_before=self._parse_historical_data(),
4879                end=self._parse_historical_data(),
4880            )
4881        )
4882
4883    def _parse_unnest(self, with_alias: bool = True) -> exp.Unnest | None:
4884        if not self._match_pair(TokenType.UNNEST, TokenType.L_PAREN, advance=False):
4885            return None
4886
4887        self._advance()
4888
4889        expressions = self._parse_wrapped_csv(self._parse_equality)
4890        offset: bool | exp.Expr = self._match_pair(TokenType.WITH, TokenType.ORDINALITY)
4891
4892        alias = self._parse_table_alias() if with_alias else None
4893
4894        if alias:
4895            if self.dialect.UNNEST_COLUMN_ONLY:
4896                if alias.args.get("columns"):
4897                    self.raise_error("Unexpected extra column alias in unnest.")
4898
4899                alias.set("columns", [alias.this])
4900                alias.set("this", None)
4901
4902            columns = alias.args.get("columns") or []
4903            if offset and len(expressions) < len(columns):
4904                offset = columns.pop()
4905
4906        if not offset and self._match_pair(TokenType.WITH, TokenType.OFFSET):
4907            self._match(TokenType.ALIAS)
4908            offset = self._parse_id_var(
4909                any_token=False, tokens=self.UNNEST_OFFSET_ALIAS_TOKENS
4910            ) or exp.to_identifier("offset")
4911
4912        return self.expression(exp.Unnest(expressions=expressions, alias=alias, offset=offset))
4913
4914    def _parse_derived_table_values(self) -> exp.Values | None:
4915        is_derived = self._match_pair(TokenType.L_PAREN, TokenType.VALUES)
4916        if not is_derived and not (
4917            # ClickHouse's `FORMAT Values` is equivalent to `VALUES`
4918            self._match_text_seq("VALUES") or self._match_text_seq("FORMAT", "VALUES")
4919        ):
4920            return None
4921
4922        expressions = self._parse_csv(self._parse_value)
4923        alias = self._parse_table_alias()
4924
4925        if is_derived:
4926            self._match_r_paren()
4927
4928        return self.expression(
4929            exp.Values(expressions=expressions, alias=alias or self._parse_table_alias())
4930        )
4931
4932    def _parse_table_sample(self, as_modifier: bool = False) -> exp.TableSample | None:
4933        if not self._match(TokenType.TABLE_SAMPLE) and not (
4934            as_modifier and self._match_text_seq("USING", "SAMPLE")
4935        ):
4936            return None
4937
4938        bucket_numerator = None
4939        bucket_denominator = None
4940        bucket_field = None
4941        percent = None
4942        size = None
4943        seed = None
4944
4945        method = self._parse_var(tokens=(TokenType.ROW,), upper=True)
4946        matched_l_paren = self._match(TokenType.L_PAREN)
4947
4948        if self.TABLESAMPLE_CSV:
4949            num = None
4950            expressions = self._parse_csv(self._parse_primary)
4951        else:
4952            expressions = None
4953            num = (
4954                self._parse_factor()
4955                if self._match(TokenType.NUMBER, advance=False)
4956                else self._parse_primary() or self._parse_placeholder()
4957            )
4958
4959        if self._match_text_seq("BUCKET"):
4960            bucket_numerator = self._parse_number()
4961            self._match_text_seq("OUT", "OF")
4962            bucket_denominator = bucket_denominator = self._parse_number()
4963            self._match(TokenType.ON)
4964            bucket_field = self._parse_field()
4965        elif self._match_set((TokenType.PERCENT, TokenType.MOD)):
4966            percent = num
4967        elif self._match(TokenType.ROWS) or not self.dialect.TABLESAMPLE_SIZE_IS_PERCENT:
4968            size = num
4969        else:
4970            percent = num
4971
4972        if matched_l_paren:
4973            self._match_r_paren()
4974
4975        if self._match(TokenType.L_PAREN):
4976            method = self._parse_var(upper=True)
4977            seed = self._match(TokenType.COMMA) and self._parse_number()
4978            self._match_r_paren()
4979        elif self._match_texts(("SEED", "REPEATABLE")):
4980            seed = self._parse_wrapped(self._parse_number)
4981
4982        if not method and self.DEFAULT_SAMPLING_METHOD:
4983            method = exp.var(self.DEFAULT_SAMPLING_METHOD)
4984
4985        return self.expression(
4986            exp.TableSample(
4987                expressions=expressions,
4988                method=method,
4989                bucket_numerator=bucket_numerator,
4990                bucket_denominator=bucket_denominator,
4991                bucket_field=bucket_field,
4992                percent=percent,
4993                size=size,
4994                seed=seed,
4995            )
4996        )
4997
4998    def _parse_pivots(self) -> list[exp.Pivot] | None:
4999        return list(iter(self._parse_pivot, None)) or None
5000
5001    def _parse_joins(self) -> t.Iterator[exp.Join]:
5002        return iter(self._parse_join, None)
5003
5004    def _parse_unpivot_columns(self) -> exp.UnpivotColumns | None:
5005        if not self._match(TokenType.INTO):
5006            return None
5007
5008        return self.expression(
5009            exp.UnpivotColumns(
5010                this=self._match_text_seq("NAME") and self._parse_column(),
5011                expressions=self._match_text_seq("VALUE") and self._parse_csv(self._parse_column),
5012            )
5013        )
5014
5015    # https://duckdb.org/docs/sql/statements/pivot
5016    def _parse_simplified_pivot(self, is_unpivot: bool | None = None) -> exp.Pivot:
5017        def _parse_on() -> exp.Expr | None:
5018            this = self._parse_bitwise()
5019
5020            if self._match(TokenType.IN):
5021                # PIVOT ... ON col IN (row_val1, row_val2)
5022                return self._parse_in(this)
5023            if self._match(TokenType.ALIAS, advance=False):
5024                # UNPIVOT ... ON (col1, col2, col3) AS row_val
5025                return self._parse_alias(this)
5026
5027            return this
5028
5029        this = self._parse_table()
5030        expressions = self._match(TokenType.ON) and self._parse_csv(_parse_on)
5031        into = self._parse_unpivot_columns()
5032        using = self._match(TokenType.USING) and self._parse_csv(
5033            lambda: self._parse_alias(self._parse_column())
5034        )
5035        group = self._parse_group()
5036
5037        return self.expression(
5038            exp.Pivot(
5039                this=this,
5040                expressions=expressions,
5041                using=using,
5042                group=group,
5043                unpivot=is_unpivot,
5044                into=into,
5045            )
5046        )
5047
5048    def _parse_pivot_in(self) -> exp.In:
5049        def _parse_aliased_expression() -> exp.Expr | None:
5050            this = self._parse_select_or_expression()
5051
5052            self._match(TokenType.ALIAS)
5053            alias = self._parse_bitwise()
5054            if alias:
5055                if isinstance(alias, exp.Column) and not alias.db:
5056                    alias = alias.this
5057                return self.expression(exp.PivotAlias(this=this, alias=alias))
5058
5059            return this
5060
5061        value = self._parse_column()
5062
5063        if not self._match(TokenType.IN):
5064            self.raise_error("Expecting IN")
5065
5066        if self._match(TokenType.L_PAREN):
5067            if self._match(TokenType.ANY):
5068                exprs: list[exp.Expr] = ensure_list(exp.PivotAny(this=self._parse_order()))
5069            else:
5070                exprs = self._parse_csv(_parse_aliased_expression)
5071            self._match_r_paren()
5072            return self.expression(exp.In(this=value, expressions=exprs))
5073
5074        return self.expression(exp.In(this=value, field=self._parse_id_var()))
5075
5076    def _parse_pivot_aggregation(self) -> exp.Expr | None:
5077        func = self._parse_function()
5078        if not func:
5079            if self._prev.token_type == TokenType.COMMA:
5080                return None
5081            self.raise_error("Expecting an aggregation function in PIVOT")
5082
5083        return self._parse_alias(func)
5084
5085    def _parse_pivot(self) -> exp.Pivot | None:
5086        index = self._index
5087        include_nulls = None
5088
5089        if self._match(TokenType.PIVOT):
5090            unpivot = False
5091        elif self._match(TokenType.UNPIVOT):
5092            unpivot = True
5093
5094            # https://docs.databricks.com/en/sql/language-manual/sql-ref-syntax-qry-select-unpivot.html#syntax
5095            if self._match_text_seq("INCLUDE", "NULLS"):
5096                include_nulls = True
5097            elif self._match_text_seq("EXCLUDE", "NULLS"):
5098                include_nulls = False
5099        else:
5100            return None
5101
5102        expressions = []
5103
5104        if not self._match(TokenType.L_PAREN):
5105            self._retreat(index)
5106            return None
5107
5108        if unpivot:
5109            expressions = self._parse_csv(self._parse_column)
5110        else:
5111            expressions = self._parse_csv(self._parse_pivot_aggregation)
5112
5113        if not expressions:
5114            self.raise_error("Failed to parse PIVOT's aggregation list")
5115
5116        if not self._match(TokenType.FOR):
5117            self.raise_error("Expecting FOR")
5118
5119        fields = []
5120        while True:
5121            field = self._try_parse(self._parse_pivot_in)
5122            if not field:
5123                break
5124            fields.append(field)
5125
5126        default_on_null = self._match_text_seq("DEFAULT", "ON", "NULL") and self._parse_wrapped(
5127            self._parse_bitwise
5128        )
5129
5130        group = self._parse_group()
5131
5132        self._match_r_paren()
5133
5134        pivot = self.expression(
5135            exp.Pivot(
5136                expressions=expressions,
5137                fields=fields,
5138                unpivot=unpivot,
5139                include_nulls=include_nulls,
5140                default_on_null=default_on_null,
5141                group=group,
5142            )
5143        )
5144
5145        if not self._match_set((TokenType.PIVOT, TokenType.UNPIVOT), advance=False):
5146            pivot.set("alias", self._parse_table_alias())
5147
5148        if not unpivot:
5149            names = self._pivot_column_names(t.cast(list[exp.Expr], expressions))
5150
5151            columns: list[exp.Expr] = []
5152            all_fields = []
5153            for pivot_field in pivot.fields:
5154                pivot_field_expressions = pivot_field.expressions
5155
5156                # The `PivotAny` expression corresponds to `ANY ORDER BY <column>`; we can't infer in this case.
5157                if isinstance(seq_get(pivot_field_expressions, 0), exp.PivotAny):
5158                    continue
5159
5160                all_fields.append(
5161                    [
5162                        fld.sql() if self.IDENTIFY_PIVOT_STRINGS else fld.alias_or_name
5163                        for fld in pivot_field_expressions
5164                    ]
5165                )
5166
5167            if all_fields:
5168                if names:
5169                    all_fields.append(names)
5170
5171                # Generate all possible combinations of the pivot columns
5172                # e.g PIVOT(sum(...) as total FOR year IN (2000, 2010) FOR country IN ('NL', 'US'))
5173                # generates the product between [[2000, 2010], ['NL', 'US'], ['total']]
5174                for fld_parts_tuple in itertools.product(*all_fields):
5175                    fld_parts = list(fld_parts_tuple)
5176
5177                    if names and self.PREFIXED_PIVOT_COLUMNS:
5178                        # Move the "name" to the front of the list
5179                        fld_parts.insert(0, fld_parts.pop(-1))
5180
5181                    columns.append(exp.to_identifier("_".join(fld_parts)))
5182
5183            pivot.set("columns", columns)
5184
5185        return pivot
5186
5187    def _pivot_column_names(self, aggregations: list[exp.Expr]) -> list[str]:
5188        return [agg.alias for agg in aggregations if agg.alias]
5189
5190    def _parse_prewhere(self, skip_where_token: bool = False) -> exp.PreWhere | None:
5191        if not skip_where_token and not self._match(TokenType.PREWHERE):
5192            return None
5193
5194        comments = self._prev_comments
5195        return self.expression(
5196            exp.PreWhere(this=self._parse_disjunction()),
5197            comments=comments,
5198        )
5199
5200    def _parse_where(self, skip_where_token: bool = False) -> exp.Where | None:
5201        if not skip_where_token and not self._match(TokenType.WHERE):
5202            return None
5203
5204        comments = self._prev_comments
5205        return self.expression(
5206            exp.Where(this=self._parse_disjunction()),
5207            comments=comments,
5208        )
5209
5210    def _parse_group(self, skip_group_by_token: bool = False) -> exp.Group | None:
5211        if not skip_group_by_token and not self._match(TokenType.GROUP_BY):
5212            return None
5213        comments = self._prev_comments
5214
5215        elements: dict[str, t.Any] = defaultdict(list)
5216
5217        if self._match(TokenType.ALL):
5218            elements["all"] = True
5219        elif self._match(TokenType.DISTINCT):
5220            elements["all"] = False
5221
5222        if self._match_set(self.QUERY_MODIFIER_TOKENS, advance=False):
5223            return self.expression(exp.Group(**elements), comments=comments)  # type: ignore
5224
5225        while True:
5226            index = self._index
5227
5228            elements["expressions"].extend(
5229                self._parse_csv(
5230                    lambda: (
5231                        None
5232                        if self._match_set((TokenType.CUBE, TokenType.ROLLUP), advance=False)
5233                        else self._parse_disjunction()
5234                    )
5235                )
5236            )
5237
5238            before_with_index = self._index
5239            with_prefix = self._match(TokenType.WITH)
5240
5241            if cube_or_rollup := self._parse_cube_or_rollup(with_prefix=with_prefix):
5242                key = "rollup" if isinstance(cube_or_rollup, exp.Rollup) else "cube"
5243                elements[key].append(cube_or_rollup)
5244            elif grouping_sets := self._parse_grouping_sets():
5245                elements["grouping_sets"].append(grouping_sets)
5246            elif self._match_text_seq("TOTALS"):
5247                elements["totals"] = True  # type: ignore
5248
5249            if before_with_index <= self._index <= before_with_index + 1:
5250                self._retreat(before_with_index)
5251                break
5252
5253            if index == self._index:
5254                break
5255
5256        return self.expression(exp.Group(**elements), comments=comments)  # type: ignore
5257
5258    def _parse_cube_or_rollup(self, with_prefix: bool = False) -> exp.Cube | exp.Rollup | None:
5259        if self._match(TokenType.CUBE):
5260            kind: type[exp.Cube | exp.Rollup] = exp.Cube
5261        elif self._match(TokenType.ROLLUP):
5262            kind = exp.Rollup
5263        else:
5264            return None
5265
5266        return self.expression(
5267            kind(expressions=[] if with_prefix else self._parse_wrapped_csv(self._parse_bitwise))
5268        )
5269
5270    def _parse_grouping_sets(self) -> exp.GroupingSets | None:
5271        if self._match(TokenType.GROUPING_SETS):
5272            return self.expression(
5273                exp.GroupingSets(expressions=self._parse_wrapped_csv(self._parse_grouping_set))
5274            )
5275        return None
5276
5277    def _parse_grouping_set(self) -> exp.Expr | None:
5278        return self._parse_grouping_sets() or self._parse_cube_or_rollup() or self._parse_bitwise()
5279
5280    def _parse_having(self, skip_having_token: bool = False) -> exp.Having | None:
5281        if not skip_having_token and not self._match(TokenType.HAVING):
5282            return None
5283        comments = self._prev_comments
5284        return self.expression(
5285            exp.Having(this=self._parse_disjunction()),
5286            comments=comments,
5287        )
5288
5289    def _parse_qualify(self) -> exp.Qualify | None:
5290        if not self._match(TokenType.QUALIFY):
5291            return None
5292        return self.expression(exp.Qualify(this=self._parse_disjunction()))
5293
5294    def _parse_connect_with_prior(self) -> exp.Expr | None:
5295        self.NO_PAREN_FUNCTION_PARSERS["PRIOR"] = lambda self: self.expression(
5296            exp.Prior(this=self._parse_bitwise())
5297        )
5298        connect = self._parse_disjunction()
5299        self.NO_PAREN_FUNCTION_PARSERS.pop("PRIOR")
5300        return connect
5301
5302    def _parse_connect(self, skip_start_token: bool = False) -> exp.Connect | None:
5303        if skip_start_token:
5304            start = None
5305        elif self._match(TokenType.START_WITH):
5306            start = self._parse_disjunction()
5307        else:
5308            return None
5309
5310        self._match(TokenType.CONNECT_BY)
5311        nocycle = self._match_text_seq("NOCYCLE")
5312        connect = self._parse_connect_with_prior()
5313
5314        if not start and self._match(TokenType.START_WITH):
5315            start = self._parse_disjunction()
5316
5317        return self.expression(exp.Connect(start=start, connect=connect, nocycle=nocycle))
5318
5319    def _parse_name_as_expression(self) -> exp.Expr | None:
5320        this = self._parse_id_var(any_token=True)
5321        if self._match(TokenType.ALIAS):
5322            this = self.expression(exp.Alias(alias=this, this=self._parse_disjunction()))
5323        return this
5324
5325    def _parse_interpolate(self) -> list[exp.Expr] | None:
5326        if self._match_text_seq("INTERPOLATE"):
5327            return self._parse_wrapped_csv(self._parse_name_as_expression)
5328        return None
5329
5330    def _parse_order(
5331        self, this: exp.Expr | None = None, skip_order_token: bool = False
5332    ) -> exp.Expr | None:
5333        siblings = None
5334        if not skip_order_token and not self._match(TokenType.ORDER_BY):
5335            if not self._match(TokenType.ORDER_SIBLINGS_BY):
5336                return this
5337
5338            siblings = True
5339
5340        comments = self._prev_comments
5341        return self.expression(
5342            exp.Order(
5343                this=this,
5344                expressions=self._parse_csv(self._parse_ordered),
5345                siblings=siblings,
5346            ),
5347            comments=comments,
5348        )
5349
5350    def _parse_sort(self, exp_class: type[E], token: TokenType) -> E | None:
5351        if not self._match(token):
5352            return None
5353        return self.expression(exp_class(expressions=self._parse_csv(self._parse_ordered)))
5354
5355    def _parse_ordered(
5356        self, parse_method: t.Callable[[], exp.Expr | None] | None = None
5357    ) -> exp.Ordered | None:
5358        this = parse_method() if parse_method else self._parse_disjunction()
5359        if not this:
5360            return None
5361
5362        if this.name.upper() == "ALL" and self.dialect.SUPPORTS_ORDER_BY_ALL:
5363            this = exp.var("ALL")
5364
5365        asc = self._match(TokenType.ASC)
5366        desc: bool | None = True if self._match(TokenType.DESC) else (False if asc else None)
5367
5368        is_nulls_first = self._match_text_seq("NULLS", "FIRST")
5369        is_nulls_last = self._match_text_seq("NULLS", "LAST")
5370
5371        nulls_first = is_nulls_first or False
5372        explicitly_null_ordered = is_nulls_first or is_nulls_last
5373
5374        if (
5375            not explicitly_null_ordered
5376            and (
5377                (not desc and self.dialect.NULL_ORDERING == "nulls_are_small")
5378                or (desc and self.dialect.NULL_ORDERING != "nulls_are_small")
5379            )
5380            and self.dialect.NULL_ORDERING != "nulls_are_last"
5381        ):
5382            nulls_first = True
5383
5384        if self._match_text_seq("WITH", "FILL"):
5385            with_fill = self.expression(
5386                exp.WithFill(
5387                    from_=self._match(TokenType.FROM) and self._parse_bitwise(),
5388                    to=self._match_text_seq("TO") and self._parse_bitwise(),
5389                    step=self._match_text_seq("STEP") and self._parse_bitwise(),
5390                    interpolate=self._parse_interpolate(),
5391                )
5392            )
5393        else:
5394            with_fill = None
5395
5396        return self.expression(
5397            exp.Ordered(this=this, desc=desc, nulls_first=nulls_first, with_fill=with_fill)
5398        )
5399
5400    def _parse_limit_options(self) -> exp.LimitOptions | None:
5401        percent = self._match_set((TokenType.PERCENT, TokenType.MOD))
5402        rows = self._match_set((TokenType.ROW, TokenType.ROWS))
5403        self._match_text_seq("ONLY")
5404        with_ties = self._match_text_seq("WITH", "TIES")
5405
5406        if not (percent or rows or with_ties):
5407            return None
5408
5409        return self.expression(exp.LimitOptions(percent=percent, rows=rows, with_ties=with_ties))
5410
5411    def _parse_limit(
5412        self,
5413        this: exp.Expr | None = None,
5414        top: bool = False,
5415        skip_limit_token: bool = False,
5416    ) -> exp.Expr | None:
5417        if skip_limit_token or self._match(TokenType.TOP if top else TokenType.LIMIT):
5418            comments = self._prev_comments
5419            if top:
5420                limit_paren = self._match(TokenType.L_PAREN)
5421                expression = (
5422                    self._parse_term() or self._parse_select()
5423                    if limit_paren
5424                    else self._parse_number()
5425                )
5426
5427                if limit_paren:
5428                    self._match_r_paren()
5429
5430            else:
5431                # Parsing LIMIT x% (i.e x PERCENT) as a term leads to an error, since
5432                # we try to build an exp.Mod expr. For that matter, we backtrack and instead
5433                # consume the factor plus parse the percentage separately
5434                index = self._index
5435                expression = self._try_parse(self._parse_term)
5436                if isinstance(expression, exp.Mod):
5437                    self._retreat(index)
5438                    expression = self._parse_factor()
5439                elif not expression:
5440                    expression = self._parse_factor()
5441            limit_options = self._parse_limit_options()
5442
5443            if self._match(TokenType.COMMA):
5444                offset = expression
5445                expression = self._parse_term()
5446            else:
5447                offset = None
5448
5449            limit_exp = self.expression(
5450                exp.Limit(
5451                    this=this,
5452                    expression=expression,
5453                    offset=offset,
5454                    limit_options=limit_options,
5455                    expressions=self._parse_limit_by(),
5456                ),
5457                comments=comments,
5458            )
5459
5460            return limit_exp
5461
5462        if self._match(TokenType.FETCH):
5463            direction = (
5464                self._prev.text.upper()
5465                if self._match_set((TokenType.FIRST, TokenType.NEXT))
5466                else "FIRST"
5467            )
5468
5469            count = self._parse_field(tokens=self.FETCH_TOKENS)
5470
5471            return self.expression(
5472                exp.Fetch(
5473                    direction=direction, count=count, limit_options=self._parse_limit_options()
5474                )
5475            )
5476
5477        return this
5478
5479    def _parse_offset(self, this: exp.Expr | None = None) -> exp.Expr | None:
5480        if not self._match(TokenType.OFFSET):
5481            return this
5482
5483        count = self._parse_term()
5484        self._match_set((TokenType.ROW, TokenType.ROWS))
5485
5486        return self.expression(
5487            exp.Offset(this=this, expression=count, expressions=self._parse_limit_by())
5488        )
5489
5490    def _can_parse_limit_or_offset(self) -> bool:
5491        if not self._match_set(self.AMBIGUOUS_ALIAS_TOKENS, advance=False):
5492            return False
5493
5494        index = self._index
5495        result = bool(
5496            self._try_parse(self._parse_limit, retreat=True)
5497            or self._try_parse(self._parse_offset, retreat=True)
5498        )
5499        self._retreat(index)
5500
5501        # MATCH_CONDITION (...) is a special construct that should not be consumed by limit/offset
5502        if self._next.token_type == TokenType.MATCH_CONDITION:
5503            result = False
5504
5505        return result
5506
5507    def _parse_limit_by(self) -> list[exp.Expr] | None:
5508        return self._parse_csv(self._parse_bitwise) if self._match_text_seq("BY") else None
5509
5510    def _parse_locks(self) -> list[exp.Lock]:
5511        locks = []
5512        while True:
5513            update, key = None, None
5514            if self._match_text_seq("FOR", "UPDATE"):
5515                update = True
5516            elif self._match_text_seq("FOR", "SHARE") or self._match_text_seq(
5517                "LOCK", "IN", "SHARE", "MODE"
5518            ):
5519                update = False
5520            elif self._match_text_seq("FOR", "KEY", "SHARE"):
5521                update, key = False, True
5522            elif self._match_text_seq("FOR", "NO", "KEY", "UPDATE"):
5523                update, key = True, True
5524            else:
5525                break
5526
5527            expressions = None
5528            if self._match_text_seq("OF"):
5529                expressions = self._parse_csv(lambda: self._parse_table(schema=True))
5530
5531            wait: bool | exp.Expr | None = None
5532            if self._match_text_seq("NOWAIT"):
5533                wait = True
5534            elif self._match_text_seq("WAIT"):
5535                wait = self._parse_primary()
5536            elif self._match_text_seq("SKIP", "LOCKED"):
5537                wait = False
5538
5539            locks.append(
5540                self.expression(
5541                    exp.Lock(update=update, expressions=expressions, wait=wait, key=key)
5542                )
5543            )
5544
5545        return locks
5546
5547    def parse_set_operation(
5548        self, this: exp.Expr | None, consume_pipe: bool = False
5549    ) -> exp.Expr | None:
5550        start = self._index
5551        _, side_token, kind_token = self._parse_join_parts()
5552
5553        side = side_token.text if side_token else None
5554        kind = kind_token.text if kind_token else None
5555
5556        if not self._match_set(self.SET_OPERATIONS):
5557            self._retreat(start)
5558            return None
5559
5560        token_type = self._prev.token_type
5561
5562        if token_type == TokenType.UNION:
5563            operation: type[exp.SetOperation] = exp.Union
5564        elif token_type == TokenType.EXCEPT:
5565            operation = exp.Except
5566        else:
5567            operation = exp.Intersect
5568
5569        comments = self._prev.comments
5570
5571        if self._match(TokenType.DISTINCT):
5572            distinct: bool | None = True
5573        elif self._match(TokenType.ALL):
5574            distinct = False
5575        else:
5576            distinct = self.dialect.SET_OP_DISTINCT_BY_DEFAULT[operation]
5577            if distinct is None:
5578                self.raise_error(f"Expected DISTINCT or ALL for {operation.__name__}")
5579
5580        by_name = (
5581            self._match_text_seq("BY", "NAME")
5582            or self._match_text_seq("STRICT", "CORRESPONDING")
5583            or None
5584        )
5585        if self._match_text_seq("CORRESPONDING"):
5586            by_name = True
5587            if not side and not kind:
5588                kind = "INNER"
5589
5590        on_column_list = None
5591        if by_name and self._match_texts(("ON", "BY")):
5592            on_column_list = self._parse_wrapped_csv(self._parse_column)
5593
5594        expression = self._parse_select(
5595            nested=True, parse_set_operation=False, consume_pipe=consume_pipe
5596        )
5597
5598        return self.expression(
5599            operation(
5600                this=this,
5601                distinct=distinct,
5602                by_name=by_name,
5603                expression=expression,
5604                side=side,
5605                kind=kind,
5606                on=on_column_list,
5607            ),
5608            comments=comments,
5609        )
5610
5611    def _parse_set_operations(self, this: exp.Expr | None) -> exp.Expr | None:
5612        while this:
5613            setop = self.parse_set_operation(this)
5614            if not setop:
5615                break
5616            this = setop
5617
5618        if isinstance(this, exp.SetOperation) and self.MODIFIERS_ATTACHED_TO_SET_OP:
5619            expression = this.expression
5620
5621            if expression:
5622                for arg in self.SET_OP_MODIFIERS:
5623                    expr = expression.args.get(arg)
5624                    if expr:
5625                        this.set(arg, expr.pop())
5626
5627        return this
5628
5629    def _parse_expression(self) -> exp.Expr | None:
5630        return self._parse_alias(self._parse_assignment())
5631
5632    def _parse_assignment(self) -> exp.Expr | None:
5633        this = self._parse_disjunction()
5634        if not this and self._next.token_type in self.ASSIGNMENT:
5635            # This allows us to parse <non-identifier token> := <expr>
5636            this = exp.column(
5637                t.cast(str, self._advance_any(ignore_reserved=True) and self._prev.text)
5638            )
5639
5640        while self._match_set(self.ASSIGNMENT):
5641            if isinstance(this, exp.Column) and len(this.parts) == 1:
5642                this = this.this
5643
5644            comments = self._prev_comments
5645            this = self.expression(
5646                self.ASSIGNMENT[self._prev.token_type](
5647                    this=this, expression=self._parse_assignment()
5648                ),
5649                comments=comments,
5650            )
5651
5652        return this
5653
5654    def _parse_disjunction(self) -> exp.Expr | None:
5655        this = self._parse_conjunction()
5656        while self._match_set(self.DISJUNCTION):
5657            comments = self._prev_comments
5658            this = self.expression(
5659                self.DISJUNCTION[self._prev.token_type](
5660                    this=this, expression=self._parse_conjunction()
5661                ),
5662                comments=comments,
5663            )
5664        return this
5665
5666    def _parse_conjunction(self) -> exp.Expr | None:
5667        this = self._parse_equality()
5668        while self._match_set(self.CONJUNCTION):
5669            comments = self._prev_comments
5670            this = self.expression(
5671                self.CONJUNCTION[self._prev.token_type](
5672                    this=this, expression=self._parse_equality()
5673                ),
5674                comments=comments,
5675            )
5676        return this
5677
5678    def _parse_equality(self) -> exp.Expr | None:
5679        this = self._parse_comparison()
5680        while self._match_set(self.EQUALITY):
5681            comments = self._prev_comments
5682            this = self.expression(
5683                self.EQUALITY[self._prev.token_type](
5684                    this=this, expression=self._parse_comparison()
5685                ),
5686                comments=comments,
5687            )
5688        return this
5689
5690    def _parse_comparison(self) -> exp.Expr | None:
5691        this = self._parse_range()
5692        while self._match_set(self.COMPARISON):
5693            comments = self._prev_comments
5694            this = self.expression(
5695                self.COMPARISON[self._prev.token_type](this=this, expression=self._parse_range()),
5696                comments=comments,
5697            )
5698        return this
5699
5700    def _parse_range(self, this: exp.Expr | None = None) -> exp.Expr | None:
5701        this = this or self._parse_bitwise()
5702        negate = self._match(TokenType.NOT)
5703
5704        if self._match_set(self.RANGE_PARSERS):
5705            expression = self.RANGE_PARSERS[self._prev.token_type](self, this)
5706            if not expression:
5707                return this
5708
5709            this = expression
5710        elif self._match(TokenType.ISNULL) or (negate and self._match(TokenType.NULL)):
5711            this = self.expression(exp.Is(this=this, expression=exp.Null()))
5712
5713        # Postgres supports ISNULL and NOTNULL for conditions.
5714        # https://blog.andreiavram.ro/postgresql-null-composite-type/
5715        if self._match(TokenType.NOTNULL):
5716            this = self.expression(exp.Is(this=this, expression=exp.Null()))
5717            this = self.expression(exp.Not(this=this))
5718
5719        if negate:
5720            this = self._negate_range(this)
5721
5722        if self._match(TokenType.IS):
5723            this = self._parse_is(this)
5724
5725        return this
5726
5727    def _negate_range(self, this: exp.Expr | None = None) -> exp.Expr | None:
5728        if not this:
5729            return this
5730
5731        return self.expression(exp.Not(this=this))
5732
5733    def _parse_is(self, this: exp.Expr | None) -> exp.Expr | None:
5734        index = self._index - 1
5735        negate = self._match(TokenType.NOT)
5736
5737        if self._match_text_seq("DISTINCT", "FROM"):
5738            klass = exp.NullSafeEQ if negate else exp.NullSafeNEQ
5739            return self.expression(klass(this=this, expression=self._parse_bitwise()))
5740
5741        if self._match(TokenType.JSON):
5742            kind = self._match_texts(self.IS_JSON_PREDICATE_KIND) and self._prev.text.upper()
5743
5744            if self._match_text_seq("WITH"):
5745                _with = True
5746            elif self._match_text_seq("WITHOUT"):
5747                _with = False
5748            else:
5749                _with = None
5750
5751            unique = self._match(TokenType.UNIQUE)
5752            self._match_text_seq("KEYS")
5753            expression: exp.Expr | None = self.expression(
5754                exp.JSON(this=kind, with_=_with, unique=unique)
5755            )
5756        else:
5757            expression = self._parse_null() or self._parse_bitwise()
5758            if not expression:
5759                self._retreat(index)
5760                return None
5761
5762        this = self.expression(exp.Is(this=this, expression=expression))
5763        this = self.expression(exp.Not(this=this)) if negate else this
5764        return self._parse_column_ops(this)
5765
5766    def _parse_in(self, this: exp.Expr | None, alias: bool = False) -> exp.In:
5767        unnest = self._parse_unnest(with_alias=False)
5768        if unnest:
5769            this = self.expression(exp.In(this=this, unnest=unnest))
5770        elif self._match_set((TokenType.L_PAREN, TokenType.L_BRACKET)):
5771            matched_l_paren = self._prev.token_type == TokenType.L_PAREN
5772            expressions = self._parse_csv(lambda: self._parse_select_or_expression(alias=alias))
5773
5774            if len(expressions) == 1 and isinstance(query := expressions[0], exp.Query):
5775                this = self.expression(
5776                    exp.In(this=this, query=self._parse_query_modifiers(query).subquery(copy=False))
5777                )
5778            else:
5779                this = self.expression(exp.In(this=this, expressions=expressions))
5780
5781            if matched_l_paren:
5782                self._match_r_paren(this)
5783            elif not self._match(TokenType.R_BRACKET, expression=this):
5784                self.raise_error("Expecting ]")
5785        else:
5786            this = self.expression(exp.In(this=this, field=self._parse_column()))
5787
5788        return this
5789
5790    def _parse_between(self, this: exp.Expr | None) -> exp.Between:
5791        symmetric = None
5792        if self._match_text_seq("SYMMETRIC"):
5793            symmetric = True
5794        elif self._match_text_seq("ASYMMETRIC"):
5795            symmetric = False
5796
5797        low = self._parse_bitwise()
5798        self._match(TokenType.AND)
5799        high = self._parse_bitwise()
5800
5801        return self.expression(exp.Between(this=this, low=low, high=high, symmetric=symmetric))
5802
5803    def _parse_escape(self, this: exp.Expr | None) -> exp.Expr | None:
5804        if not self._match(TokenType.ESCAPE):
5805            return this
5806        return self.expression(
5807            exp.Escape(this=this, expression=self._parse_string() or self._parse_null())
5808        )
5809
5810    def _parse_interval_span(self, this: exp.Expr) -> exp.Interval:
5811        # handle day-time format interval span with omitted units:
5812        #   INTERVAL '<number days> hh[:][mm[:ss[.ff]]]' <maybe `unit TO unit`>
5813        interval_span_units_omitted = None
5814        if (
5815            this
5816            and this.is_string
5817            and self.SUPPORTS_OMITTED_INTERVAL_SPAN_UNIT
5818            and exp.INTERVAL_DAY_TIME_RE.match(this.name)
5819        ):
5820            index = self._index
5821
5822            # Var "TO" Var
5823            first_unit = self._parse_var(any_token=True, upper=True)
5824            second_unit = None
5825            if first_unit and self._match_text_seq("TO"):
5826                second_unit = self._parse_var(any_token=True, upper=True)
5827
5828            interval_span_units_omitted = not (first_unit and second_unit)
5829
5830            self._retreat(index)
5831
5832        unit = (
5833            None
5834            if interval_span_units_omitted
5835            else (
5836                self._parse_function()
5837                or (
5838                    not self._match_set((TokenType.ALIAS, TokenType.DCOLON), advance=False)
5839                    and self._parse_var(any_token=True, upper=True)
5840                )
5841            )
5842        )
5843
5844        # Most dialects support, e.g., the form INTERVAL '5' day, thus we try to parse
5845        # each INTERVAL expression into this canonical form so it's easy to transpile
5846        if this and this.is_number:
5847            this = exp.Literal.string(this.to_py())
5848        elif this and this.is_string:
5849            parts = exp.INTERVAL_STRING_RE.findall(this.name)
5850            if parts and unit:
5851                # Unconsume the eagerly-parsed unit, since the real unit was part of the string
5852                unit = None
5853                self._retreat(self._index - 1)
5854
5855            if len(parts) == 1:
5856                this = exp.Literal.string(parts[0][0])
5857                unit = self.expression(exp.Var(this=parts[0][1].upper()))
5858
5859        if self.INTERVAL_SPANS and self._match_text_seq("TO"):
5860            unit = self.expression(
5861                exp.IntervalSpan(
5862                    this=unit,
5863                    expression=self._parse_function()
5864                    or self._parse_var(any_token=True, upper=True),
5865                )
5866            )
5867
5868        return self.expression(exp.Interval(this=this, unit=unit))
5869
5870    def _parse_interval(self, require_interval: bool = True) -> exp.Add | exp.Interval | None:
5871        index = self._index
5872
5873        if not self._match(TokenType.INTERVAL) and require_interval:
5874            return None
5875
5876        if self._match(TokenType.STRING, advance=False):
5877            this = self._parse_primary()
5878        else:
5879            this = self._parse_term()
5880
5881        if not this or (
5882            isinstance(this, exp.Column)
5883            and not this.table
5884            and not this.this.quoted
5885            and self._curr
5886            and self._curr.text.upper() not in self.dialect.VALID_INTERVAL_UNITS
5887        ):
5888            self._retreat(index)
5889            return None
5890
5891        interval = self._parse_interval_span(this)
5892
5893        index = self._index
5894        self._match(TokenType.PLUS)
5895
5896        # Convert INTERVAL 'val_1' unit_1 [+] ... [+] 'val_n' unit_n into a sum of intervals
5897        if self._match_set((TokenType.STRING, TokenType.NUMBER), advance=False):
5898            return self.expression(exp.Add(this=interval, expression=self._parse_interval(False)))
5899
5900        self._retreat(index)
5901        return interval
5902
5903    def _parse_bitwise(self) -> exp.Expr | None:
5904        this = self._parse_term()
5905
5906        while True:
5907            if self._match_set(self.BITWISE):
5908                this = self.expression(
5909                    self.BITWISE[self._prev.token_type](this=this, expression=self._parse_term())
5910                )
5911            elif self.dialect.DPIPE_IS_STRING_CONCAT and self._match(TokenType.DPIPE):
5912                this = self.expression(
5913                    exp.DPipe(
5914                        this=this,
5915                        expression=self._parse_term(),
5916                        safe=not self.dialect.STRICT_STRING_CONCAT,
5917                    )
5918                )
5919            elif self._match(TokenType.DQMARK):
5920                this = self.expression(
5921                    exp.Coalesce(this=this, expressions=ensure_list(self._parse_term()))
5922                )
5923            elif self._match_pair(TokenType.LT, TokenType.LT):
5924                this = self.expression(
5925                    exp.BitwiseLeftShift(this=this, expression=self._parse_term())
5926                )
5927            elif self._match_pair(TokenType.GT, TokenType.GT):
5928                this = self.expression(
5929                    exp.BitwiseRightShift(this=this, expression=self._parse_term())
5930                )
5931            else:
5932                break
5933
5934        return this
5935
5936    def _parse_term(self) -> exp.Expr | None:
5937        this = self._parse_factor()
5938
5939        while self._match_set(self.TERM):
5940            klass = self.TERM[self._prev.token_type]
5941            comments = self._prev_comments
5942            expression = self._parse_factor()
5943
5944            this = self.expression(klass(this=this, expression=expression), comments=comments)
5945
5946            if isinstance(this, exp.Collate):
5947                expr = this.expression
5948
5949                # Preserve collations such as pg_catalog."default" (Postgres) as columns, otherwise
5950                # fallback to Identifier / Var
5951                if isinstance(expr, exp.Column) and len(expr.parts) == 1:
5952                    ident = expr.this
5953                    if isinstance(ident, exp.Identifier):
5954                        this.set("expression", ident if ident.quoted else exp.var(ident.name))
5955
5956        return this
5957
5958    def _parse_factor(self) -> exp.Expr | None:
5959        parse_method = self._parse_exponent if self.EXPONENT else self._parse_unary
5960        this = self._parse_at_time_zone(parse_method())
5961
5962        while self._match_set(self.FACTOR):
5963            klass = self.FACTOR[self._prev.token_type]
5964            comments = self._prev_comments
5965            expression = parse_method()
5966
5967            if not expression and klass is exp.IntDiv and self._prev.text.isalpha():
5968                self._retreat(self._index - 1)
5969                return this
5970
5971            this = self.expression(klass(this=this, expression=expression), comments=comments)
5972
5973            if isinstance(this, exp.Div):
5974                this.set("typed", self.dialect.TYPED_DIVISION)
5975                this.set("safe", self.dialect.SAFE_DIVISION)
5976
5977        return this
5978
5979    def _parse_exponent(self) -> exp.Expr | None:
5980        this = self._parse_unary()
5981        while self._match_set(self.EXPONENT):
5982            comments = self._prev_comments
5983            this = self.expression(
5984                self.EXPONENT[self._prev.token_type](this=this, expression=self._parse_unary()),
5985                comments=comments,
5986            )
5987        return this
5988
5989    def _parse_unary(self) -> exp.Expr | None:
5990        if self._match_set(self.UNARY_PARSERS):
5991            return self.UNARY_PARSERS[self._prev.token_type](self)
5992        return self._parse_type()
5993
5994    def _parse_type(
5995        self, parse_interval: bool = True, fallback_to_identifier: bool = False
5996    ) -> exp.Expr | None:
5997        if not fallback_to_identifier and (atom := self._parse_atom()) is not None:
5998            return atom
5999
6000        if interval := parse_interval and self._parse_interval():
6001            return self._parse_column_ops(interval)
6002
6003        index = self._index
6004        data_type = self._parse_types(check_func=True, allow_identifiers=False)
6005
6006        # parse_types() returns a Cast if we parsed BQ's inline constructor <type>(<values>) e.g.
6007        # STRUCT<a INT, b STRING>(1, 'foo'), which is canonicalized to CAST(<values> AS <type>)
6008        if isinstance(data_type, exp.Cast):
6009            # This constructor can contain ops directly after it, for instance struct unnesting:
6010            # STRUCT<a INT, b STRING>(1, 'foo').* --> CAST(STRUCT(1, 'foo') AS STRUCT<a iNT, b STRING).*
6011            return self._parse_column_ops(data_type)
6012
6013        if data_type:
6014            index2 = self._index
6015            this = self._parse_primary()
6016
6017            if isinstance(this, exp.Literal):
6018                literal = this.name
6019                this = self._parse_column_ops(this)
6020
6021                parser = self.TYPE_LITERAL_PARSERS.get(data_type.this)
6022                if parser:
6023                    return parser(self, this, data_type)
6024
6025                if (
6026                    self.ZONE_AWARE_TIMESTAMP_CONSTRUCTOR
6027                    and data_type.is_type(exp.DType.TIMESTAMP)
6028                    and TIME_ZONE_RE.search(literal)
6029                ):
6030                    data_type = exp.DType.TIMESTAMPTZ.into_expr()
6031
6032                return self.expression(exp.Cast(this=this, to=data_type))
6033
6034            # The expressions arg gets set by the parser when we have something like DECIMAL(38, 0)
6035            # in the input SQL. In that case, we'll produce these tokens: DECIMAL ( 38 , 0 )
6036            #
6037            # If the index difference here is greater than 1, that means the parser itself must have
6038            # consumed additional tokens such as the DECIMAL scale and precision in the above example.
6039            #
6040            # If it's not greater than 1, then it must be 1, because we've consumed at least the type
6041            # keyword, meaning that the expressions arg of the DataType must have gotten set by a
6042            # callable in the TYPE_CONVERTERS mapping. For example, Snowflake converts DECIMAL to
6043            # DECIMAL(38, 0)) in order to facilitate the data type's transpilation.
6044            #
6045            # In these cases, we don't really want to return the converted type, but instead retreat
6046            # and try to parse a Column or Identifier in the section below.
6047            if data_type.expressions and index2 - index > 1:
6048                self._retreat(index2)
6049                return self._parse_column_ops(data_type)
6050
6051            self._retreat(index)
6052
6053        if fallback_to_identifier:
6054            return self._parse_id_var()
6055
6056        return self._parse_column()
6057
6058    def _parse_type_size(self) -> exp.DataTypeParam | None:
6059        this = self._parse_type()
6060        if not this:
6061            return None
6062
6063        if isinstance(this, exp.Column) and not this.table:
6064            this = exp.var(this.name.upper())
6065
6066        return self.expression(
6067            exp.DataTypeParam(this=this, expression=self._parse_var(any_token=True))
6068        )
6069
6070    def _parse_user_defined_type(self, identifier: exp.Identifier) -> exp.Expr | None:
6071        type_name = identifier.name
6072
6073        while self._match(TokenType.DOT):
6074            type_name = f"{type_name}.{self._advance_any() and self._prev.text}"
6075
6076        return exp.DataType.build(type_name, dialect=self.dialect, udt=True)
6077
6078    def _parse_types(
6079        self, check_func: bool = False, schema: bool = False, allow_identifiers: bool = True
6080    ) -> exp.Expr | None:
6081        index = self._index
6082        this: exp.Expr | None = None
6083
6084        if self._match_set(self.TYPE_TOKENS):
6085            type_token = self._prev.token_type
6086        else:
6087            type_token = None
6088            identifier = allow_identifiers and self._parse_id_var(
6089                any_token=False, tokens=(TokenType.VAR,)
6090            )
6091            if isinstance(identifier, exp.Identifier):
6092                try:
6093                    tokens = self.dialect.tokenize(identifier.name)
6094                except TokenError:
6095                    tokens = None
6096
6097                if tokens and (type_token := tokens[0].token_type) in self.TYPE_TOKENS:
6098                    if len(tokens) > 1:
6099                        return exp.DataType.build(identifier.name, dialect=self.dialect)
6100                elif self.dialect.SUPPORTS_USER_DEFINED_TYPES:
6101                    this = self._parse_user_defined_type(identifier)
6102                else:
6103                    self._retreat(self._index - 1)
6104                    return None
6105            else:
6106                return None
6107
6108        if type_token == TokenType.PSEUDO_TYPE:
6109            return self.expression(exp.PseudoType(this=self._prev.text.upper()))
6110
6111        if type_token == TokenType.OBJECT_IDENTIFIER:
6112            return self.expression(exp.ObjectIdentifier(this=self._prev.text.upper()))
6113
6114        # https://materialize.com/docs/sql/types/map/
6115        if type_token == TokenType.MAP and self._match(TokenType.L_BRACKET):
6116            key_type = self._parse_types(
6117                check_func=check_func, schema=schema, allow_identifiers=allow_identifiers
6118            )
6119            if not self._match(TokenType.FARROW):
6120                self._retreat(index)
6121                return None
6122
6123            value_type = self._parse_types(
6124                check_func=check_func, schema=schema, allow_identifiers=allow_identifiers
6125            )
6126            if not self._match(TokenType.R_BRACKET):
6127                self._retreat(index)
6128                return None
6129
6130            return exp.DataType(
6131                this=exp.DType.MAP,
6132                expressions=[key_type, value_type],
6133                nested=True,
6134            )
6135
6136        nested = type_token in self.NESTED_TYPE_TOKENS
6137        is_struct = type_token in self.STRUCT_TYPE_TOKENS
6138        is_aggregate = type_token in self.AGGREGATE_TYPE_TOKENS
6139        expressions = None
6140        maybe_func = False
6141
6142        if self._match(TokenType.L_PAREN):
6143            if is_struct:
6144                expressions = self._parse_csv(lambda: self._parse_struct_types(type_required=True))
6145            elif nested:
6146                expressions = self._parse_csv(
6147                    lambda: self._parse_types(
6148                        check_func=check_func, schema=schema, allow_identifiers=allow_identifiers
6149                    )
6150                )
6151                if type_token == TokenType.NULLABLE and len(expressions) == 1:
6152                    this = expressions[0]
6153                    this.set("nullable", True)
6154                    self._match_r_paren()
6155                    return this
6156            elif type_token in self.ENUM_TYPE_TOKENS:
6157                expressions = self._parse_csv(self._parse_equality)
6158            elif type_token == TokenType.JSON:
6159                # ClickHouse JSON type supports arguments: JSON(col Type, SKIP col, param=value)
6160                # https://clickhouse.com/docs/sql-reference/data-types/newjson
6161                expressions = self._parse_csv(self._parse_json_type_arg)
6162            elif is_aggregate:
6163                func_or_ident = self._parse_function(anonymous=True) or self._parse_id_var(
6164                    any_token=False, tokens=(TokenType.VAR, TokenType.ANY)
6165                )
6166                if not func_or_ident:
6167                    return None
6168                expressions = [func_or_ident]
6169                if self._match(TokenType.COMMA):
6170                    expressions.extend(
6171                        self._parse_csv(
6172                            lambda: self._parse_types(
6173                                check_func=check_func,
6174                                schema=schema,
6175                                allow_identifiers=allow_identifiers,
6176                            )
6177                        )
6178                    )
6179            else:
6180                expressions = self._parse_csv(self._parse_type_size)
6181
6182                # https://docs.snowflake.com/en/sql-reference/data-types-vector
6183                if type_token == TokenType.VECTOR and len(expressions) == 2:
6184                    expressions = self._parse_vector_expressions(expressions)
6185
6186            if not self._match(TokenType.R_PAREN):
6187                self._retreat(index)
6188                return None
6189
6190            maybe_func = True
6191
6192        values: list[exp.Expr] | None = None
6193
6194        if nested and self._match(TokenType.LT):
6195            if is_struct:
6196                expressions = self._parse_csv(lambda: self._parse_struct_types(type_required=True))
6197            else:
6198                expressions = self._parse_csv(
6199                    lambda: self._parse_types(
6200                        check_func=check_func, schema=schema, allow_identifiers=allow_identifiers
6201                    )
6202                )
6203
6204            if not self._match(TokenType.GT):
6205                self.raise_error("Expecting >")
6206
6207            if self._match_set((TokenType.L_BRACKET, TokenType.L_PAREN)):
6208                values = self._parse_csv(self._parse_disjunction)
6209                if not values and is_struct:
6210                    values = None
6211                    self._retreat(self._index - 1)
6212                else:
6213                    self._match_set((TokenType.R_BRACKET, TokenType.R_PAREN))
6214
6215        if type_token in self.TIMESTAMPS:
6216            if self._match_text_seq("WITH", "TIME", "ZONE"):
6217                maybe_func = False
6218                tz_type = exp.DType.TIMETZ if type_token in self.TIMES else exp.DType.TIMESTAMPTZ
6219                this = exp.DataType(this=tz_type, expressions=expressions)
6220            elif self._match_text_seq("WITH", "LOCAL", "TIME", "ZONE"):
6221                maybe_func = False
6222                this = exp.DataType(this=exp.DType.TIMESTAMPLTZ, expressions=expressions)
6223            elif self._match_text_seq("WITHOUT", "TIME", "ZONE"):
6224                maybe_func = False
6225        elif type_token == TokenType.INTERVAL:
6226            if self._curr.text.upper() in self.dialect.VALID_INTERVAL_UNITS:
6227                unit = self._parse_var(upper=True)
6228                if self._match_text_seq("TO"):
6229                    unit = exp.IntervalSpan(this=unit, expression=self._parse_var(upper=True))
6230
6231                this = self.expression(exp.DataType(this=self.expression(exp.Interval(unit=unit))))
6232            else:
6233                this = self.expression(exp.DataType(this=exp.DType.INTERVAL))
6234        elif type_token == TokenType.VOID:
6235            this = exp.DataType(this=exp.DType.NULL)
6236
6237        if maybe_func and check_func:
6238            index2 = self._index
6239            peek = self._parse_string()
6240
6241            if not peek:
6242                self._retreat(index)
6243                return None
6244
6245            self._retreat(index2)
6246
6247        if not this:
6248            assert type_token is not None
6249            if self._match_text_seq("UNSIGNED"):
6250                unsigned_type_token = self.SIGNED_TO_UNSIGNED_TYPE_TOKEN.get(type_token)
6251                if not unsigned_type_token:
6252                    self.raise_error(f"Cannot convert {type_token.name} to unsigned.")
6253
6254                type_token = unsigned_type_token or type_token
6255
6256            # NULLABLE without parentheses can be a column (Presto/Trino)
6257            if type_token == TokenType.NULLABLE and not expressions:
6258                self._retreat(index)
6259                return None
6260
6261            this = exp.DataType(
6262                this=exp.DType[type_token.name],
6263                expressions=expressions,
6264                nested=nested,
6265            )
6266
6267            # Empty arrays/structs are allowed
6268            if values is not None:
6269                cls = exp.Struct if is_struct else exp.Array
6270                this = exp.cast(cls(expressions=values), this, copy=False)
6271
6272        elif expressions:
6273            this.set("expressions", expressions)
6274
6275        # https://materialize.com/docs/sql/types/list/#type-name
6276        while self._match(TokenType.LIST):
6277            this = exp.DataType(this=exp.DType.LIST, expressions=[this], nested=True)
6278
6279        index = self._index
6280
6281        # Postgres supports the INT ARRAY[3] syntax as a synonym for INT[3]
6282        matched_array = self._match(TokenType.ARRAY)
6283
6284        while self._curr:
6285            datatype_token = self._prev.token_type
6286            matched_l_bracket = self._match(TokenType.L_BRACKET)
6287
6288            if (not matched_l_bracket and not matched_array) or (
6289                datatype_token == TokenType.ARRAY and self._match(TokenType.R_BRACKET)
6290            ):
6291                # Postgres allows casting empty arrays such as ARRAY[]::INT[],
6292                # not to be confused with the fixed size array parsing
6293                break
6294
6295            matched_array = False
6296            values = self._parse_csv(self._parse_disjunction) or None
6297            if (
6298                values
6299                and not schema
6300                and (
6301                    not self.dialect.SUPPORTS_FIXED_SIZE_ARRAYS
6302                    or datatype_token == TokenType.ARRAY
6303                    or not self._match(TokenType.R_BRACKET, advance=False)
6304                )
6305            ):
6306                # Retreating here means that we should not parse the following values as part of the data type, e.g. in DuckDB
6307                # ARRAY[1] should retreat and instead be parsed into exp.Array in contrast to INT[x][y] which denotes a fixed-size array data type
6308                self._retreat(index)
6309                break
6310
6311            this = exp.DataType(
6312                this=exp.DType.ARRAY, expressions=[this], values=values, nested=True
6313            )
6314            self._match(TokenType.R_BRACKET)
6315
6316        if self.TYPE_CONVERTERS and isinstance(this.this, exp.DType):
6317            converter = self.TYPE_CONVERTERS.get(this.this)
6318            if converter:
6319                this = converter(t.cast(exp.DataType, this))
6320
6321        return this
6322
6323    def _parse_json_type_arg(self) -> exp.Expr | None:
6324        """Parse a single argument to ClickHouse's JSON type."""
6325
6326        # SKIP col or SKIP REGEXP 'pattern'
6327        if self._match_text_seq("SKIP"):
6328            regexp = self._match(TokenType.RLIKE)
6329            arg = self._parse_column()
6330            if isinstance(arg, exp.Column):
6331                arg = arg.to_dot()
6332            return self.expression(exp.SkipJSONColumn(regexp=regexp, expression=arg))
6333
6334        param_or_col = self._parse_column()
6335        if not isinstance(param_or_col, exp.Column):
6336            return None
6337
6338        # Parameter: name=value (e.g., max_dynamic_paths=2)
6339        if len(param_or_col.parts) == 1 and self._match(TokenType.EQ):
6340            param = param_or_col.name
6341            value = self._parse_primary()
6342            return self.expression(exp.EQ(this=exp.var(param), expression=value))
6343
6344        # Column type hint: col_name Type
6345        col = param_or_col.to_dot()
6346        kind = self._parse_types(check_func=False, allow_identifiers=False)
6347        return self.expression(exp.ColumnDef(this=col, kind=kind))
6348
6349    def _parse_vector_expressions(self, expressions: list[exp.Expr]) -> list[exp.Expr]:
6350        return [exp.DataType.build(expressions[0].name, dialect=self.dialect), *expressions[1:]]
6351
6352    def _parse_struct_types(self, type_required: bool = False) -> exp.Expr | None:
6353        index = self._index
6354
6355        if (
6356            self._curr
6357            and self._next
6358            and self._curr.token_type in self.TYPE_TOKENS
6359            and self._next.token_type in self.TYPE_TOKENS
6360        ):
6361            # Takes care of special cases like `STRUCT<list ARRAY<...>>` where the identifier is also a
6362            # type token. Without this, the list will be parsed as a type and we'll eventually crash
6363            this = self._parse_id_var()
6364        else:
6365            this = (
6366                self._parse_type(parse_interval=False, fallback_to_identifier=True)
6367                or self._parse_id_var()
6368            )
6369
6370        self._match(TokenType.COLON)
6371
6372        if (
6373            type_required
6374            and not isinstance(this, exp.DataType)
6375            and not self._match_set(self.TYPE_TOKENS, advance=False)
6376        ):
6377            self._retreat(index)
6378            return self._parse_types()
6379
6380        return self._parse_column_def(this)
6381
6382    def _parse_at_time_zone(self, this: exp.Expr | None) -> exp.Expr | None:
6383        if not self._match_text_seq("AT", "TIME", "ZONE"):
6384            return this
6385        return self._parse_at_time_zone(
6386            self.expression(exp.AtTimeZone(this=this, zone=self._parse_unary()))
6387        )
6388
6389    def _parse_atom(self) -> exp.Expr | None:
6390        if (
6391            self._curr.token_type in self.IDENTIFIER_TOKENS
6392            and (column := self._parse_column()) is not None
6393        ):
6394            return column
6395
6396        token = self._curr
6397        token_type = token.token_type
6398
6399        if not (primary_parser := self.PRIMARY_PARSERS.get(token_type)):
6400            return None
6401
6402        next_type = self._next.token_type
6403
6404        if (
6405            next_type in self.COLUMN_OPERATORS
6406            or next_type in self.COLUMN_POSTFIX_TOKENS
6407            or (token_type == TokenType.STRING and next_type == TokenType.STRING)
6408        ):
6409            return None
6410
6411        self._advance()
6412        return primary_parser(self, token)
6413
6414    def _parse_column(self) -> exp.Expr | None:
6415        column: exp.Expr | None = self._parse_column_parts_fast()
6416        if column is None:
6417            this = self._parse_column_reference()
6418            if not this:
6419                this = self._parse_bracket(this)
6420            column = self._parse_column_ops(this) if this else this
6421
6422        if column:
6423            if self.dialect.SUPPORTS_COLUMN_JOIN_MARKS:
6424                column.set("join_mark", self._match(TokenType.JOIN_MARKER))
6425            if self.COLON_IS_VARIANT_EXTRACT:
6426                column = self._parse_colon_as_variant_extract(column)
6427
6428        return column
6429
6430    def _parse_column_parts_fast(self) -> exp.Column | exp.Dot | None:
6431        """Fast path for simple column and dot references (a, a.b, ...).
6432
6433        Greedily consumes VAR/IDENTIFIER tokens separated by DOTs, then checks
6434        that nothing complex follows. If it does, retreats and returns None so
6435        the slow path can handle it. For >4 parts, wraps in exp.Dot nodes.
6436        """
6437        index = self._index
6438        parts: list[exp.Identifier] | None = None
6439        all_comments: list[str] | None = None
6440
6441        while self._match_set(self.IDENTIFIER_TOKENS):
6442            token = self._prev
6443            comments = self._prev_comments
6444
6445            if parts is None and token.text.upper() in self.NO_PAREN_FUNCTION_PARSERS:
6446                self._retreat(index)
6447                return None
6448
6449            has_dot = self._match(TokenType.DOT)
6450            curr_tt = self._curr.token_type
6451
6452            if not has_dot:
6453                if curr_tt in self.COLUMN_OPERATORS or curr_tt in self.COLUMN_POSTFIX_TOKENS:
6454                    self._retreat(index)
6455                    return None
6456            elif curr_tt not in self.IDENTIFIER_TOKENS:
6457                self._retreat(index)
6458                return None
6459
6460            if parts is None:
6461                parts = []
6462
6463            if comments:
6464                if all_comments is None:
6465                    all_comments = []
6466                all_comments.extend(comments)
6467                self._prev_comments = []
6468
6469            parts.append(
6470                self.expression(
6471                    exp.Identifier(
6472                        this=token.text, quoted=token.token_type == TokenType.IDENTIFIER
6473                    ),
6474                    token,
6475                )
6476            )
6477
6478            if not has_dot:
6479                break
6480
6481        if parts is None:
6482            return None
6483
6484        n = len(parts)
6485
6486        if n == 1:
6487            column: exp.Column | exp.Dot = exp.Column(this=parts[0])
6488        elif n == 2:
6489            column = exp.Column(this=parts[1], table=parts[0])
6490        elif n == 3:
6491            column = exp.Column(this=parts[2], table=parts[1], db=parts[0])
6492        else:
6493            column = exp.Column(this=parts[3], table=parts[2], db=parts[1], catalog=parts[0])
6494
6495            for i in range(4, n):
6496                column = exp.Dot(this=column, expression=parts[i])
6497
6498        if all_comments:
6499            column.add_comments(all_comments)
6500
6501        return column
6502
6503    def _parse_column_reference(self) -> exp.Expr | None:
6504        this = self._parse_field()
6505        if (
6506            not this
6507            and self._match(TokenType.VALUES, advance=False)
6508            and self.VALUES_FOLLOWED_BY_PAREN
6509            and (not self._next or self._next.token_type != TokenType.L_PAREN)
6510        ):
6511            this = self._parse_id_var()
6512
6513        if isinstance(this, exp.Identifier):
6514            # We bubble up comments from the Identifier to the Column
6515            this = self.expression(exp.Column(this=this), comments=this.pop_comments())
6516
6517        return this
6518
6519    def _build_json_extract(
6520        self,
6521        this: exp.Expr | None,
6522        path_parts: list[exp.JSONPathPart],
6523        escape: bool | None,
6524    ) -> tuple[exp.Expr | None, list[exp.JSONPathPart]]:
6525        if len(path_parts) > 1:
6526            this = self.expression(
6527                exp.JSONExtract(
6528                    this=this,
6529                    expression=exp.JSONPath(expressions=path_parts, escape=escape),
6530                    variant_extract=True,
6531                    requires_json=self.JSON_EXTRACT_REQUIRES_JSON_EXPRESSION,
6532                )
6533            )
6534            path_parts = [exp.JSONPathRoot()]
6535
6536        return this, path_parts
6537
6538    def _parse_colon_as_variant_extract(self, this: exp.Expr | None) -> exp.Expr | None:
6539        path_parts: list[exp.JSONPathPart] = [exp.JSONPathRoot()]
6540        escape = None
6541
6542        while self._match(TokenType.COLON):
6543            key = self._parse_id_var(any_token=True, tokens=(TokenType.SELECT,))
6544
6545            if key:
6546                if isinstance(key, exp.Identifier) and key.quoted:
6547                    escape = True
6548                path_parts.append(exp.JSONPathKey(this=key.name))
6549
6550            while True:
6551                if self._match(TokenType.DOT):
6552                    next_key = self._parse_id_var(any_token=True, tokens=(TokenType.SELECT,))
6553
6554                    if next_key:
6555                        if isinstance(next_key, exp.Identifier) and next_key.quoted:
6556                            escape = True
6557                        path_parts.append(exp.JSONPathKey(this=next_key.name))
6558                elif self._match(TokenType.L_BRACKET):
6559                    bracket_expr = self._parse_bracket_key_value()
6560
6561                    if not self._match(TokenType.R_BRACKET):
6562                        self.raise_error("Expected ]")
6563
6564                    if bracket_expr:
6565                        if bracket_expr.is_string:
6566                            path_parts.append(exp.JSONPathKey(this=bracket_expr.name))
6567                            escape = True
6568                        elif bracket_expr.is_star:
6569                            path_parts.append(exp.JSONPathSubscript(this=exp.JSONPathWildcard()))
6570                        elif bracket_expr.is_number:
6571                            path_parts.append(exp.JSONPathSubscript(this=bracket_expr.to_py()))
6572                        else:
6573                            this, path_parts = self._build_json_extract(this, path_parts, escape)
6574                            escape = None
6575
6576                            this = self.expression(
6577                                exp.Bracket(
6578                                    this=this, expressions=[bracket_expr], json_access=True
6579                                ),
6580                            )
6581
6582                elif self._match(TokenType.DCOLON):
6583                    this, path_parts = self._build_json_extract(this, path_parts, escape)
6584                    escape = None
6585
6586                    cast_type = self._parse_types()
6587                    if cast_type:
6588                        this = self.expression(exp.Cast(this=this, to=cast_type))
6589                    else:
6590                        self.raise_error("Expected type after '::'")
6591                else:
6592                    break
6593
6594        this, _ = self._build_json_extract(this, path_parts, escape)
6595
6596        return this
6597
6598    def _parse_dcolon(self) -> exp.Expr | None:
6599        return self._parse_types()
6600
6601    def _parse_column_ops(self, this: exp.Expr | None) -> exp.Expr | None:
6602        while self._curr.token_type in self.BRACKETS:
6603            this = self._parse_bracket(this)
6604
6605        column_operators = self.COLUMN_OPERATORS
6606        cast_column_operators = self.CAST_COLUMN_OPERATORS
6607        while self._curr:
6608            op_token = self._curr.token_type
6609
6610            if op_token not in column_operators:
6611                break
6612            op = column_operators[op_token]
6613            self._advance()
6614
6615            if op_token in cast_column_operators:
6616                field = self._parse_dcolon()
6617                if not field:
6618                    self.raise_error("Expected type")
6619            elif op and self._curr:
6620                field = self._parse_column_reference() or self._parse_bitwise()
6621                if isinstance(field, exp.Column) and self._match(TokenType.DOT, advance=False):
6622                    field = self._parse_column_ops(field)
6623            else:
6624                field = self._parse_field(any_token=True, anonymous_func=True)
6625
6626            # Function calls can be qualified, e.g., x.y.FOO()
6627            # This converts the final AST to a series of Dots leading to the function call
6628            # https://cloud.google.com/bigquery/docs/reference/standard-sql/functions-reference#function_call_rules
6629            if isinstance(field, (exp.Func, exp.Window)) and this:
6630                this = this.transform(
6631                    lambda n: n.to_dot(include_dots=False) if isinstance(n, exp.Column) else n
6632                )
6633
6634            if op:
6635                this = op(self, this, field)
6636            elif isinstance(this, exp.Column) and not this.args.get("catalog"):
6637                this = self.expression(
6638                    exp.Column(
6639                        this=field,
6640                        table=this.this,
6641                        db=this.args.get("table"),
6642                        catalog=this.args.get("db"),
6643                    ),
6644                    comments=this.comments,
6645                )
6646            elif isinstance(field, exp.Window):
6647                # Move the exp.Dot's to the window's function
6648                window_func = self.expression(exp.Dot(this=this, expression=field.this))
6649                field.set("this", window_func)
6650                this = field
6651            else:
6652                this = self.expression(exp.Dot(this=this, expression=field))
6653
6654            if field and field.comments:
6655                t.cast(exp.Expr, this).add_comments(field.pop_comments())
6656
6657            this = self._parse_bracket(this)
6658
6659        return this
6660
6661    def _parse_paren(self) -> exp.Expr | None:
6662        if not self._match(TokenType.L_PAREN):
6663            return None
6664
6665        comments = self._prev_comments
6666        query = self._parse_select()
6667
6668        if query:
6669            expressions = [query]
6670        else:
6671            expressions = self._parse_expressions()
6672
6673        this = seq_get(expressions, 0)
6674
6675        if not this and self._match(TokenType.R_PAREN, advance=False):
6676            this = self.expression(exp.Tuple())
6677        elif isinstance(this, exp.UNWRAPPED_QUERIES):
6678            this = self._parse_subquery(this=this, parse_alias=False)
6679        elif isinstance(this, (exp.Subquery, exp.Values)):
6680            this = self._parse_subquery(
6681                this=self._parse_query_modifiers(self._parse_set_operations(this)),
6682                parse_alias=False,
6683            )
6684        elif len(expressions) > 1 or self._prev.token_type == TokenType.COMMA:
6685            this = self.expression(exp.Tuple(expressions=expressions))
6686        else:
6687            this = self.expression(exp.Paren(this=this))
6688
6689        if this:
6690            this.add_comments(comments)
6691
6692        self._match_r_paren(expression=this)
6693
6694        if isinstance(this, exp.Paren) and isinstance(this.this, exp.AggFunc):
6695            return self._parse_window(this)
6696
6697        return this
6698
6699    def _parse_primary(self) -> exp.Expr | None:
6700        if self._match_set(self.PRIMARY_PARSERS):
6701            token_type = self._prev.token_type
6702            primary = self.PRIMARY_PARSERS[token_type](self, self._prev)
6703
6704            if token_type == TokenType.STRING:
6705                expressions = [primary]
6706                while self._match(TokenType.STRING):
6707                    expressions.append(exp.Literal.string(self._prev.text))
6708
6709                if len(expressions) > 1:
6710                    return self.expression(
6711                        exp.Concat(expressions=expressions, coalesce=self.dialect.CONCAT_COALESCE)
6712                    )
6713
6714            return primary
6715
6716        if self._match_pair(TokenType.DOT, TokenType.NUMBER):
6717            return exp.Literal.number(f"0.{self._prev.text}")
6718
6719        return self._parse_paren()
6720
6721    def _parse_field(
6722        self,
6723        any_token: bool = False,
6724        tokens: t.Collection[TokenType] | None = None,
6725        anonymous_func: bool = False,
6726    ) -> exp.Expr | None:
6727        if anonymous_func:
6728            field = (
6729                self._parse_function(anonymous=anonymous_func, any_token=any_token)
6730                or self._parse_primary()
6731            )
6732        else:
6733            field = self._parse_primary() or self._parse_function(
6734                anonymous=anonymous_func, any_token=any_token
6735            )
6736        return field or self._parse_id_var(any_token=any_token, tokens=tokens)
6737
6738    def _parse_function(
6739        self,
6740        functions: dict[str, t.Callable] | None = None,
6741        anonymous: bool = False,
6742        optional_parens: bool = True,
6743        any_token: bool = False,
6744    ) -> exp.Expr | None:
6745        # This allows us to also parse {fn <function>} syntax (Snowflake, MySQL support this)
6746        # See: https://community.snowflake.com/s/article/SQL-Escape-Sequences
6747        fn_syntax = False
6748        if (
6749            self._match(TokenType.L_BRACE, advance=False)
6750            and self._next
6751            and self._next.text.upper() == "FN"
6752        ):
6753            self._advance(2)
6754            fn_syntax = True
6755
6756        func = self._parse_function_call(
6757            functions=functions,
6758            anonymous=anonymous,
6759            optional_parens=optional_parens,
6760            any_token=any_token,
6761        )
6762
6763        if fn_syntax:
6764            self._match(TokenType.R_BRACE)
6765
6766        return func
6767
6768    def _parse_function_args(self, alias: bool = False) -> list[exp.Expr]:
6769        return self._parse_csv(lambda: self._parse_lambda(alias=alias))
6770
6771    def _parse_function_call(
6772        self,
6773        functions: dict[str, t.Callable] | None = None,
6774        anonymous: bool = False,
6775        optional_parens: bool = True,
6776        any_token: bool = False,
6777    ) -> exp.Expr | None:
6778        if not self._curr:
6779            return None
6780
6781        comments = self._curr.comments
6782        prev = self._prev
6783        token = self._curr
6784        token_type = self._curr.token_type
6785        this: str | exp.Expr = self._curr.text
6786        upper = self._curr.text.upper()
6787
6788        parser = self.NO_PAREN_FUNCTION_PARSERS.get(upper)
6789        if optional_parens and parser and token_type not in self.INVALID_FUNC_NAME_TOKENS:
6790            self._advance()
6791            return self._parse_window(parser(self))
6792
6793        if self._next.token_type != TokenType.L_PAREN:
6794            if optional_parens and token_type in self.NO_PAREN_FUNCTIONS:
6795                self._advance()
6796                return self.expression(self.NO_PAREN_FUNCTIONS[token_type]())
6797
6798            return None
6799
6800        if any_token:
6801            if token_type in self.RESERVED_TOKENS:
6802                return None
6803        elif token_type not in self.FUNC_TOKENS:
6804            return None
6805
6806        self._advance(2)
6807
6808        parser = self.FUNCTION_PARSERS.get(upper)
6809        if parser and not anonymous:
6810            result = parser(self)
6811        else:
6812            subquery_predicate = self.SUBQUERY_PREDICATES.get(token_type)
6813
6814            if subquery_predicate:
6815                expr = None
6816                if self._curr.token_type in self.SUBQUERY_TOKENS:
6817                    expr = self._parse_select()
6818                    self._match_r_paren()
6819                elif prev and prev.token_type in (TokenType.LIKE, TokenType.ILIKE):
6820                    # Backtrack one token since we've consumed the L_PAREN here. Instead, we'd like
6821                    # to parse "LIKE [ANY | ALL] (...)" as a whole into an exp.Tuple or exp.Paren
6822                    self._advance(-1)
6823                    expr = self._parse_bitwise()
6824
6825                if expr:
6826                    return self.expression(subquery_predicate(this=expr), comments=comments)
6827
6828            if functions is None:
6829                functions = self.FUNCTIONS
6830
6831            function = functions.get(upper)
6832            known_function = function and not anonymous
6833
6834            alias = not known_function or upper in self.FUNCTIONS_WITH_ALIASED_ARGS
6835            args = self._parse_function_args(alias)
6836
6837            post_func_comments = self._curr.comments if self._curr else None
6838            if known_function and post_func_comments:
6839                # If the user-inputted comment "/* sqlglot.anonymous */" is following the function
6840                # call we'll construct it as exp.Anonymous, even if it's "known"
6841                if any(
6842                    comment.lstrip().startswith(exp.SQLGLOT_ANONYMOUS)
6843                    for comment in post_func_comments
6844                ):
6845                    known_function = False
6846
6847            if alias and known_function:
6848                args = self._kv_to_prop_eq(args)
6849
6850            if known_function:
6851                func_builder = t.cast(t.Callable, function)
6852
6853                # mypyc compiled functions don't have __code__, so we use
6854                # try/except to check if func_builder accepts 'dialect'.
6855                try:
6856                    func = func_builder(args)
6857                except TypeError:
6858                    func = func_builder(args, dialect=self.dialect)
6859
6860                func = self.validate_expression(func, args)
6861                if self.dialect.PRESERVE_ORIGINAL_NAMES:
6862                    func.meta["name"] = this
6863
6864                result = func
6865            else:
6866                if token_type == TokenType.IDENTIFIER:
6867                    this = exp.Identifier(this=this, quoted=True).update_positions(token)
6868
6869                result = self.expression(exp.Anonymous(this=this, expressions=args))
6870
6871            result = result.update_positions(token)
6872
6873        if isinstance(result, exp.Expr):
6874            result.add_comments(comments)
6875
6876        if parser:
6877            self._match(TokenType.R_PAREN, expression=result)
6878        else:
6879            self._match_r_paren(result)
6880        return self._parse_window(result)
6881
6882    def _to_prop_eq(self, expression: exp.Expr, index: int) -> exp.Expr:
6883        return expression
6884
6885    def _kv_to_prop_eq(
6886        self, expressions: list[exp.Expr], parse_map: bool = False
6887    ) -> list[exp.Expr]:
6888        transformed = []
6889
6890        for index, e in enumerate(expressions):
6891            if isinstance(e, self.KEY_VALUE_DEFINITIONS):
6892                if isinstance(e, exp.Alias):
6893                    e = self.expression(exp.PropertyEQ(this=e.args.get("alias"), expression=e.this))
6894
6895                if not isinstance(e, exp.PropertyEQ):
6896                    e = self.expression(
6897                        exp.PropertyEQ(
6898                            this=e.this if parse_map else exp.to_identifier(e.this.name),
6899                            expression=e.expression,
6900                        )
6901                    )
6902
6903                if isinstance(e.this, exp.Column):
6904                    e.this.replace(e.this.this)
6905            else:
6906                e = self._to_prop_eq(e, index)
6907
6908            transformed.append(e)
6909
6910        return transformed
6911
6912    def _parse_function_properties(self) -> exp.Properties | None:
6913        return self._parse_properties()
6914
6915    def _parse_user_defined_function_expression(self) -> exp.Expr | None:
6916        return self._parse_statement()
6917
6918    def _parse_function_parameter(self) -> exp.Expr | None:
6919        return self._parse_column_def(this=self._parse_id_var(), computed_column=False)
6920
6921    def _parse_user_defined_function(self, kind: TokenType | None = None) -> exp.Expr | None:
6922        this = self._parse_table_parts(schema=True)
6923
6924        if not self._match(TokenType.L_PAREN):
6925            return this
6926
6927        expressions = self._parse_csv(self._parse_function_parameter)
6928        self._match_r_paren()
6929        return self.expression(
6930            exp.UserDefinedFunction(this=this, expressions=expressions, wrapped=True)
6931        )
6932
6933    def _parse_introducer(self, token: Token) -> exp.Introducer | exp.Identifier:
6934        literal = self._parse_primary()
6935        if literal:
6936            return self.expression(exp.Introducer(this=token.text, expression=literal), token)
6937
6938        return self._identifier_expression(token)
6939
6940    def _parse_session_parameter(self) -> exp.SessionParameter:
6941        kind = None
6942        this = self._parse_id_var() or self._parse_primary()
6943
6944        if this and self._match(TokenType.DOT):
6945            kind = this.name
6946            this = self._parse_var() or self._parse_primary()
6947
6948        return self.expression(exp.SessionParameter(this=this, kind=kind))
6949
6950    def _parse_lambda_arg(self) -> exp.Expr | None:
6951        return self._parse_id_var()
6952
6953    def _parse_lambda(self, alias: bool = False) -> exp.Expr | None:
6954        next_token_type = self._next.token_type
6955
6956        # Fast path: simple atom (column, literal, null, bool) followed by , or )
6957        if (
6958            next_token_type in self.LAMBDA_ARG_TERMINATORS
6959            and (atom := self._parse_atom()) is not None
6960        ):
6961            return atom
6962
6963        index = self._index
6964
6965        if self._match(TokenType.L_PAREN):
6966            expressions = t.cast(
6967                list[t.Optional[exp.Expr]], self._parse_csv(self._parse_lambda_arg)
6968            )
6969
6970            if not self._match(TokenType.R_PAREN):
6971                self._retreat(index)
6972            elif self._match_set(self.LAMBDAS):
6973                return self.LAMBDAS[self._prev.token_type](self, expressions)
6974            else:
6975                self._retreat(index)
6976        elif self.TYPED_LAMBDA_ARGS or next_token_type in self.LAMBDAS:
6977            expressions = [self._parse_lambda_arg()]
6978
6979            if self._match_set(self.LAMBDAS):
6980                return self.LAMBDAS[self._prev.token_type](self, expressions)
6981
6982            self._retreat(index)
6983
6984        this: exp.Expr | None
6985
6986        if self._match(TokenType.DISTINCT):
6987            this = self.expression(
6988                exp.Distinct(expressions=self._parse_csv(self._parse_disjunction))
6989            )
6990        else:
6991            this = self._parse_select_or_expression(alias=alias)
6992
6993        return self._parse_limit(
6994            self._parse_respect_or_ignore_nulls(
6995                self._parse_order(self._parse_having_max(self._parse_respect_or_ignore_nulls(this)))
6996            )
6997        )
6998
6999    def _parse_schema(self, this: exp.Expr | None = None) -> exp.Expr | None:
7000        index = self._index
7001        if not self._match(TokenType.L_PAREN):
7002            return this
7003
7004        # Disambiguate between schema and subquery/CTE, e.g. in INSERT INTO table (<expr>),
7005        # expr can be of both types
7006        if self._match_set(self.SELECT_START_TOKENS):
7007            self._retreat(index)
7008            return this
7009        args = self._parse_csv(lambda: self._parse_constraint() or self._parse_field_def())
7010        self._match_r_paren()
7011        return self.expression(exp.Schema(this=this, expressions=args))
7012
7013    def _parse_field_def(self) -> exp.Expr | None:
7014        return self._parse_column_def(self._parse_field(any_token=True))
7015
7016    def _parse_column_def(
7017        self, this: exp.Expr | None, computed_column: bool = True
7018    ) -> exp.Expr | None:
7019        # column defs are not really columns, they're identifiers
7020        if isinstance(this, exp.Column):
7021            this = this.this
7022
7023        if not computed_column:
7024            self._match(TokenType.ALIAS)
7025
7026        kind = self._parse_types(schema=True)
7027
7028        if self._match_text_seq("FOR", "ORDINALITY"):
7029            return self.expression(exp.ColumnDef(this=this, ordinality=True))
7030
7031        constraints: list[exp.Expr] = []
7032
7033        if (not kind and self._match(TokenType.ALIAS)) or self._match_texts(
7034            ("ALIAS", "MATERIALIZED")
7035        ):
7036            persisted = self._prev.text.upper() == "MATERIALIZED"
7037            constraint_kind = exp.ComputedColumnConstraint(
7038                this=self._parse_disjunction(),
7039                persisted=persisted or self._match_text_seq("PERSISTED"),
7040                data_type=exp.Var(this="AUTO")
7041                if self._match_text_seq("AUTO")
7042                else self._parse_types(),
7043                not_null=self._match_pair(TokenType.NOT, TokenType.NULL),
7044            )
7045            constraints.append(self.expression(exp.ColumnConstraint(kind=constraint_kind)))
7046        elif not kind and self._match_set({TokenType.IN, TokenType.OUT}, advance=False):
7047            in_out_constraint = self.expression(
7048                exp.InOutColumnConstraint(
7049                    input_=self._match(TokenType.IN), output=self._match(TokenType.OUT)
7050                )
7051            )
7052            constraints.append(in_out_constraint)
7053            kind = self._parse_types()
7054        elif (
7055            kind
7056            and self._match(TokenType.ALIAS, advance=False)
7057            and (
7058                not self.WRAPPED_TRANSFORM_COLUMN_CONSTRAINT
7059                or self._next.token_type == TokenType.L_PAREN
7060            )
7061        ):
7062            self._advance()
7063            constraints.append(
7064                self.expression(
7065                    exp.ColumnConstraint(
7066                        kind=exp.ComputedColumnConstraint(
7067                            this=self._parse_disjunction(),
7068                            persisted=self._match_texts(("STORED", "VIRTUAL"))
7069                            and self._prev.text.upper() == "STORED",
7070                        )
7071                    )
7072                )
7073            )
7074
7075        while True:
7076            constraint = self._parse_column_constraint()
7077            if not constraint:
7078                break
7079            constraints.append(constraint)
7080
7081        if not kind and not constraints:
7082            return this
7083
7084        return self.expression(exp.ColumnDef(this=this, kind=kind, constraints=constraints))
7085
7086    def _parse_auto_increment(
7087        self,
7088    ) -> exp.GeneratedAsIdentityColumnConstraint | exp.AutoIncrementColumnConstraint:
7089        start = None
7090        increment = None
7091        order = None
7092
7093        if self._match(TokenType.L_PAREN, advance=False):
7094            args = self._parse_wrapped_csv(self._parse_bitwise)
7095            start = seq_get(args, 0)
7096            increment = seq_get(args, 1)
7097        elif self._match_text_seq("START"):
7098            start = self._parse_bitwise()
7099            self._match_text_seq("INCREMENT")
7100            increment = self._parse_bitwise()
7101            if self._match_text_seq("ORDER"):
7102                order = True
7103            elif self._match_text_seq("NOORDER"):
7104                order = False
7105
7106        if start and increment:
7107            return exp.GeneratedAsIdentityColumnConstraint(
7108                start=start, increment=increment, this=False, order=order
7109            )
7110
7111        return exp.AutoIncrementColumnConstraint()
7112
7113    def _parse_check_constraint(self) -> exp.CheckColumnConstraint | None:
7114        if not self._match(TokenType.L_PAREN, advance=False):
7115            return None
7116
7117        return self.expression(
7118            exp.CheckColumnConstraint(
7119                this=self._parse_wrapped(self._parse_assignment),
7120                enforced=self._match_text_seq("ENFORCED"),
7121            )
7122        )
7123
7124    def _parse_auto_property(self) -> exp.AutoRefreshProperty | None:
7125        if not self._match_text_seq("REFRESH"):
7126            self._retreat(self._index - 1)
7127            return None
7128        return self.expression(exp.AutoRefreshProperty(this=self._parse_var(upper=True)))
7129
7130    def _parse_compress(self) -> exp.CompressColumnConstraint:
7131        if self._match(TokenType.L_PAREN, advance=False):
7132            return self.expression(
7133                exp.CompressColumnConstraint(this=self._parse_wrapped_csv(self._parse_bitwise))
7134            )
7135
7136        return self.expression(exp.CompressColumnConstraint(this=self._parse_bitwise()))
7137
7138    def _parse_generated_as_identity(
7139        self,
7140    ) -> (
7141        exp.GeneratedAsIdentityColumnConstraint
7142        | exp.ComputedColumnConstraint
7143        | exp.GeneratedAsRowColumnConstraint
7144    ):
7145        if self._match_text_seq("BY", "DEFAULT"):
7146            on_null = self._match_pair(TokenType.ON, TokenType.NULL)
7147            this = self.expression(
7148                exp.GeneratedAsIdentityColumnConstraint(this=False, on_null=on_null)
7149            )
7150        else:
7151            self._match_text_seq("ALWAYS")
7152            this = self.expression(exp.GeneratedAsIdentityColumnConstraint(this=True))
7153
7154        self._match(TokenType.ALIAS)
7155
7156        if self._match_text_seq("ROW"):
7157            start = self._match_text_seq("START")
7158            if not start:
7159                self._match(TokenType.END)
7160            hidden = self._match_text_seq("HIDDEN")
7161            return self.expression(exp.GeneratedAsRowColumnConstraint(start=start, hidden=hidden))
7162
7163        identity = self._match_text_seq("IDENTITY")
7164
7165        if self._match(TokenType.L_PAREN):
7166            if self._match(TokenType.START_WITH):
7167                this.set("start", self._parse_bitwise())
7168            if self._match_text_seq("INCREMENT", "BY"):
7169                this.set("increment", self._parse_bitwise())
7170            if self._match_text_seq("MINVALUE"):
7171                this.set("minvalue", self._parse_bitwise())
7172            if self._match_text_seq("MAXVALUE"):
7173                this.set("maxvalue", self._parse_bitwise())
7174
7175            if self._match_text_seq("CYCLE"):
7176                this.set("cycle", True)
7177            elif self._match_text_seq("NO", "CYCLE"):
7178                this.set("cycle", False)
7179
7180            if not identity:
7181                this.set("expression", self._parse_range())
7182            elif not this.args.get("start") and self._match(TokenType.NUMBER, advance=False):
7183                args = self._parse_csv(self._parse_bitwise)
7184                this.set("start", seq_get(args, 0))
7185                this.set("increment", seq_get(args, 1))
7186
7187            self._match_r_paren()
7188
7189        return this
7190
7191    def _parse_inline(self) -> exp.InlineLengthColumnConstraint:
7192        self._match_text_seq("LENGTH")
7193        return self.expression(exp.InlineLengthColumnConstraint(this=self._parse_bitwise()))
7194
7195    def _parse_not_constraint(self) -> exp.Expr | None:
7196        if self._match_text_seq("NULL"):
7197            return self.expression(exp.NotNullColumnConstraint())
7198        if self._match_text_seq("CASESPECIFIC"):
7199            return self.expression(exp.CaseSpecificColumnConstraint(not_=True))
7200        if self._match_text_seq("FOR", "REPLICATION"):
7201            return self.expression(exp.NotForReplicationColumnConstraint())
7202
7203        # Unconsume the `NOT` token
7204        self._retreat(self._index - 1)
7205        return None
7206
7207    def _parse_column_constraint(self) -> exp.Expr | None:
7208        this = self._parse_id_var() if self._match(TokenType.CONSTRAINT) else None
7209
7210        procedure_option_follows = (
7211            self._match(TokenType.WITH, advance=False)
7212            and self._next
7213            and self._next.text.upper() in self.PROCEDURE_OPTIONS
7214        )
7215
7216        if not procedure_option_follows and self._match_texts(self.CONSTRAINT_PARSERS):
7217            constraint = self.CONSTRAINT_PARSERS[self._prev.text.upper()](self)
7218            if not constraint:
7219                self._retreat(self._index - 1)
7220                return None
7221
7222            return self.expression(exp.ColumnConstraint(this=this, kind=constraint))
7223
7224        return this
7225
7226    def _parse_constraint(self) -> exp.Expr | None:
7227        if not self._match(TokenType.CONSTRAINT):
7228            return self._parse_unnamed_constraint(constraints=self.SCHEMA_UNNAMED_CONSTRAINTS)
7229
7230        return self.expression(
7231            exp.Constraint(this=self._parse_id_var(), expressions=self._parse_unnamed_constraints())
7232        )
7233
7234    def _parse_unnamed_constraints(self) -> list[exp.Expr]:
7235        constraints = []
7236        while True:
7237            constraint = self._parse_unnamed_constraint() or self._parse_function()
7238            if not constraint:
7239                break
7240            constraints.append(constraint)
7241
7242        return constraints
7243
7244    def _parse_unnamed_constraint(
7245        self, constraints: t.Collection[str] | None = None
7246    ) -> exp.Expr | None:
7247        index = self._index
7248
7249        if self._match(TokenType.IDENTIFIER, advance=False) or not self._match_texts(
7250            constraints or self.CONSTRAINT_PARSERS
7251        ):
7252            return None
7253
7254        constraint_key = self._prev.text.upper()
7255        if constraint_key not in self.CONSTRAINT_PARSERS:
7256            self.raise_error(f"No parser found for schema constraint {constraint_key}.")
7257
7258        result = self.CONSTRAINT_PARSERS[constraint_key](self)
7259        if not result:
7260            self._retreat(index)
7261
7262        return result
7263
7264    def _parse_unique_key(self) -> exp.Expr | None:
7265        if (
7266            self._curr
7267            and self._curr.token_type != TokenType.IDENTIFIER
7268            and self._curr.text.upper() in self.CONSTRAINT_PARSERS
7269        ):
7270            return None
7271        return self._parse_id_var(any_token=False)
7272
7273    def _parse_unique(self) -> exp.UniqueColumnConstraint:
7274        self._match_texts(("KEY", "INDEX"))
7275        return self.expression(
7276            exp.UniqueColumnConstraint(
7277                nulls=self._match_text_seq("NULLS", "NOT", "DISTINCT"),
7278                this=self._parse_schema(self._parse_unique_key()),
7279                index_type=self._match(TokenType.USING) and self._advance_any() and self._prev.text,
7280                on_conflict=self._parse_on_conflict(),
7281                options=self._parse_key_constraint_options(),
7282            )
7283        )
7284
7285    def _parse_key_constraint_options(self) -> list[str]:
7286        options = []
7287        while True:
7288            if not self._curr:
7289                break
7290
7291            if self._match(TokenType.ON):
7292                action = None
7293                on = self._advance_any() and self._prev.text
7294
7295                if self._match_text_seq("NO", "ACTION"):
7296                    action = "NO ACTION"
7297                elif self._match_text_seq("CASCADE"):
7298                    action = "CASCADE"
7299                elif self._match_text_seq("RESTRICT"):
7300                    action = "RESTRICT"
7301                elif self._match_pair(TokenType.SET, TokenType.NULL):
7302                    action = "SET NULL"
7303                elif self._match_pair(TokenType.SET, TokenType.DEFAULT):
7304                    action = "SET DEFAULT"
7305                else:
7306                    self.raise_error("Invalid key constraint")
7307
7308                options.append(f"ON {on} {action}")
7309            else:
7310                var = self._parse_var_from_options(
7311                    self.KEY_CONSTRAINT_OPTIONS, raise_unmatched=False
7312                )
7313                if not var:
7314                    break
7315                options.append(var.name)
7316
7317        return options
7318
7319    def _parse_references(self, match: bool = True) -> exp.Reference | None:
7320        if match and not self._match(TokenType.REFERENCES):
7321            return None
7322
7323        expressions: list | None = None
7324        this = self._parse_table(schema=True)
7325        options = self._parse_key_constraint_options()
7326        return self.expression(exp.Reference(this=this, expressions=expressions, options=options))
7327
7328    def _parse_foreign_key(self) -> exp.ForeignKey:
7329        expressions = (
7330            self._parse_wrapped_id_vars()
7331            if not self._match(TokenType.REFERENCES, advance=False)
7332            else None
7333        )
7334        reference = self._parse_references()
7335        on_options = {}
7336
7337        while self._match(TokenType.ON):
7338            if not self._match_set((TokenType.DELETE, TokenType.UPDATE)):
7339                self.raise_error("Expected DELETE or UPDATE")
7340
7341            kind = self._prev.text.lower()
7342
7343            if self._match_text_seq("NO", "ACTION"):
7344                action = "NO ACTION"
7345            elif self._match(TokenType.SET):
7346                self._match_set((TokenType.NULL, TokenType.DEFAULT))
7347                action = "SET " + self._prev.text.upper()
7348            else:
7349                self._advance()
7350                action = self._prev.text.upper()
7351
7352            on_options[kind] = action
7353
7354        return self.expression(
7355            exp.ForeignKey(
7356                expressions=expressions,
7357                reference=reference,
7358                options=self._parse_key_constraint_options(),
7359                **on_options,
7360            )
7361        )
7362
7363    def _parse_primary_key_part(self) -> exp.Expr | None:
7364        return self._parse_field()
7365
7366    def _parse_period_for_system_time(self) -> exp.PeriodForSystemTimeConstraint | None:
7367        if not self._match(TokenType.TIMESTAMP_SNAPSHOT):
7368            self._retreat(self._index - 1)
7369            return None
7370
7371        id_vars = self._parse_wrapped_id_vars()
7372        return self.expression(
7373            exp.PeriodForSystemTimeConstraint(
7374                this=seq_get(id_vars, 0), expression=seq_get(id_vars, 1)
7375            )
7376        )
7377
7378    def _parse_primary_key(
7379        self,
7380        wrapped_optional: bool = False,
7381        in_props: bool = False,
7382        named_primary_key: bool = False,
7383    ) -> exp.PrimaryKeyColumnConstraint | exp.PrimaryKey:
7384        desc = (
7385            self._prev.token_type == TokenType.DESC
7386            if self._match_set((TokenType.ASC, TokenType.DESC))
7387            else None
7388        )
7389
7390        this = None
7391        if (
7392            named_primary_key
7393            and self._curr.text.upper() not in self.CONSTRAINT_PARSERS
7394            and self._next
7395            and self._next.token_type == TokenType.L_PAREN
7396        ):
7397            this = self._parse_id_var()
7398
7399        if not in_props and not self._match(TokenType.L_PAREN, advance=False):
7400            return self.expression(
7401                exp.PrimaryKeyColumnConstraint(
7402                    desc=desc, options=self._parse_key_constraint_options()
7403                )
7404            )
7405
7406        expressions = self._parse_wrapped_csv(
7407            self._parse_primary_key_part, optional=wrapped_optional
7408        )
7409
7410        return self.expression(
7411            exp.PrimaryKey(
7412                this=this,
7413                expressions=expressions,
7414                include=self._parse_index_params(),
7415                options=self._parse_key_constraint_options(),
7416            )
7417        )
7418
7419    def _parse_bracket_key_value(self, is_map: bool = False) -> exp.Expr | None:
7420        return self._parse_slice(self._parse_alias(self._parse_disjunction(), explicit=True))
7421
7422    def _parse_odbc_datetime_literal(self) -> exp.Expr:
7423        """
7424        Parses a datetime column in ODBC format. We parse the column into the corresponding
7425        types, for example `{d'yyyy-mm-dd'}` will be parsed as a `Date` column, exactly the
7426        same as we did for `DATE('yyyy-mm-dd')`.
7427
7428        Reference:
7429        https://learn.microsoft.com/en-us/sql/odbc/reference/develop-app/date-time-and-timestamp-literals
7430        """
7431        self._match(TokenType.VAR)
7432        exp_class = self.ODBC_DATETIME_LITERALS[self._prev.text.lower()]
7433        expression = self.expression(exp_class(this=self._parse_string()))
7434        if not self._match(TokenType.R_BRACE):
7435            self.raise_error("Expected }")
7436        return expression
7437
7438    def _parse_bracket(self, this: exp.Expr | None = None) -> exp.Expr | None:
7439        if not self._match_set(self.BRACKETS):
7440            return this
7441
7442        if self.MAP_KEYS_ARE_ARBITRARY_EXPRESSIONS:
7443            map_token = seq_get(self._tokens, self._index - 2)
7444            parse_map = map_token is not None and map_token.text.upper() == "MAP"
7445        else:
7446            parse_map = False
7447
7448        bracket_kind = self._prev.token_type
7449        if (
7450            bracket_kind == TokenType.L_BRACE
7451            and self._curr
7452            and self._curr.token_type == TokenType.VAR
7453            and self._curr.text.lower() in self.ODBC_DATETIME_LITERALS
7454        ):
7455            return self._parse_odbc_datetime_literal()
7456
7457        expressions = self._parse_csv(
7458            lambda: self._parse_bracket_key_value(is_map=bracket_kind == TokenType.L_BRACE)
7459        )
7460
7461        if bracket_kind == TokenType.L_BRACKET and not self._match(TokenType.R_BRACKET):
7462            self.raise_error("Expected ]")
7463        elif bracket_kind == TokenType.L_BRACE and not self._match(TokenType.R_BRACE):
7464            self.raise_error("Expected }")
7465
7466        # https://duckdb.org/docs/sql/data_types/struct.html#creating-structs
7467        if bracket_kind == TokenType.L_BRACE:
7468            this = self.expression(
7469                exp.Struct(
7470                    expressions=self._kv_to_prop_eq(expressions=expressions, parse_map=parse_map)
7471                )
7472            )
7473        elif not this:
7474            this = build_array_constructor(
7475                exp.Array, args=expressions, bracket_kind=bracket_kind, dialect=self.dialect
7476            )
7477        else:
7478            constructor_type = self.ARRAY_CONSTRUCTORS.get(this.name.upper())
7479            if constructor_type:
7480                return build_array_constructor(
7481                    constructor_type,
7482                    args=expressions,
7483                    bracket_kind=bracket_kind,
7484                    dialect=self.dialect,
7485                )
7486
7487            expressions = apply_index_offset(
7488                this, expressions, -self.dialect.INDEX_OFFSET, dialect=self.dialect
7489            )
7490            this = self.expression(
7491                exp.Bracket(this=this, expressions=expressions), comments=this.pop_comments()
7492            )
7493
7494        self._add_comments(this)
7495        return self._parse_bracket(this)
7496
7497    def _parse_slice(self, this: exp.Expr | None) -> exp.Expr | None:
7498        if not self._match(TokenType.COLON):
7499            return this
7500
7501        if self._match_pair(TokenType.DASH, TokenType.COLON, advance=False):
7502            self._advance()
7503            end: exp.Expr | None = -exp.Literal.number("1")
7504        else:
7505            end = self._parse_assignment()
7506        step = self._parse_unary() if self._match(TokenType.COLON) else None
7507        return self.expression(exp.Slice(this=this, expression=end, step=step))
7508
7509    def _parse_case(self) -> exp.Expr | None:
7510        if self._match(TokenType.DOT, advance=False):
7511            # Avoid raising on valid expressions like case.*, supported by, e.g., spark & snowflake
7512            self._retreat(self._index - 1)
7513            return None
7514
7515        ifs = []
7516        default = None
7517
7518        comments = self._prev_comments
7519        expression = self._parse_disjunction()
7520
7521        while self._match(TokenType.WHEN):
7522            this = self._parse_disjunction()
7523            self._match(TokenType.THEN)
7524            then = self._parse_disjunction()
7525            ifs.append(self.expression(exp.If(this=this, true=then)))
7526
7527        if self._match(TokenType.ELSE):
7528            default = self._parse_disjunction()
7529
7530        if not self._match(TokenType.END):
7531            if isinstance(default, exp.Interval) and default.this.sql().upper() == "END":
7532                default = exp.column("interval")
7533            else:
7534                self.raise_error("Expected END after CASE", self._prev)
7535
7536        return self.expression(
7537            exp.Case(this=expression, ifs=ifs, default=default), comments=comments
7538        )
7539
7540    def _parse_if(self) -> exp.Expr | None:
7541        if self._match(TokenType.L_PAREN):
7542            args = self._parse_csv(
7543                lambda: self._parse_alias(self._parse_assignment(), explicit=True)
7544            )
7545            this = self.validate_expression(exp.If.from_arg_list(args), args)
7546            self._match_r_paren()
7547        else:
7548            index = self._index - 1
7549
7550            if self.NO_PAREN_IF_COMMANDS and index == 0:
7551                return self._parse_as_command(self._prev)
7552
7553            condition = self._parse_disjunction()
7554
7555            if not condition:
7556                self._retreat(index)
7557                return None
7558
7559            self._match(TokenType.THEN)
7560            true = self._parse_disjunction()
7561            false = self._parse_disjunction() if self._match(TokenType.ELSE) else None
7562            self._match(TokenType.END)
7563            this = self.expression(exp.If(this=condition, true=true, false=false))
7564
7565        return this
7566
7567    def _parse_next_value_for(self) -> exp.Expr | None:
7568        if not self._match_text_seq("VALUE", "FOR"):
7569            self._retreat(self._index - 1)
7570            return None
7571
7572        return self.expression(
7573            exp.NextValueFor(
7574                this=self._parse_column(),
7575                order=self._match(TokenType.OVER) and self._parse_wrapped(self._parse_order),
7576            )
7577        )
7578
7579    def _parse_extract(self) -> exp.Extract | exp.Anonymous:
7580        this = self._parse_function() or self._parse_var_or_string(upper=True)
7581
7582        if self._match(TokenType.FROM):
7583            return self.expression(exp.Extract(this=this, expression=self._parse_bitwise()))
7584
7585        if not self._match(TokenType.COMMA):
7586            self.raise_error("Expected FROM or comma after EXTRACT", self._prev)
7587
7588        return self.expression(exp.Extract(this=this, expression=self._parse_bitwise()))
7589
7590    def _parse_gap_fill(self) -> exp.GapFill:
7591        self._match(TokenType.TABLE)
7592        this = self._parse_table()
7593
7594        self._match(TokenType.COMMA)
7595        args = [this, *self._parse_csv(self._parse_lambda)]
7596
7597        gap_fill = exp.GapFill.from_arg_list(args)
7598        return self.validate_expression(gap_fill, args)
7599
7600    def _parse_char(self) -> exp.Chr:
7601        return self.expression(
7602            exp.Chr(
7603                expressions=self._parse_csv(self._parse_assignment),
7604                charset=self._match(TokenType.USING) and self._parse_var(),
7605            )
7606        )
7607
7608    def _parse_cast(self, strict: bool, safe: bool | None = None) -> exp.Expr:
7609        this = self._parse_assignment()
7610
7611        if not self._match(TokenType.ALIAS):
7612            if self._match(TokenType.COMMA):
7613                return self.expression(exp.CastToStrType(this=this, to=self._parse_string()))
7614
7615            self.raise_error("Expected AS after CAST")
7616
7617        fmt = None
7618        to = self._parse_types()
7619
7620        default = None
7621        if self._match(TokenType.DEFAULT):
7622            default = self._parse_bitwise()
7623            self._match_text_seq("ON", "CONVERSION", "ERROR")
7624
7625        if self._match_set((TokenType.FORMAT, TokenType.COMMA)):
7626            fmt_string = self._parse_wrapped(self._parse_string, optional=True)
7627            fmt = self._parse_at_time_zone(fmt_string)
7628
7629            if not to:
7630                to = exp.DType.UNKNOWN.into_expr()
7631            if to.this in exp.DataType.TEMPORAL_TYPES:
7632                this = self.expression(
7633                    (exp.StrToDate if to.this == exp.DType.DATE else exp.StrToTime)(
7634                        this=this,
7635                        format=exp.Literal.string(
7636                            format_time(
7637                                fmt_string.this if fmt_string else "",
7638                                self.dialect.FORMAT_MAPPING or self.dialect.TIME_MAPPING,
7639                                self.dialect.FORMAT_TRIE or self.dialect.TIME_TRIE,
7640                            )
7641                        ),
7642                        safe=safe,
7643                    )
7644                )
7645
7646                if isinstance(fmt, exp.AtTimeZone) and isinstance(this, exp.StrToTime):
7647                    this.set("zone", fmt.args["zone"])
7648                return this
7649        elif not to:
7650            self.raise_error("Expected TYPE after CAST")
7651        elif isinstance(to, exp.Identifier):
7652            to = exp.DataType.build(to.name, dialect=self.dialect, udt=True)
7653        elif to.this == exp.DType.CHAR and self._match(TokenType.CHARACTER_SET):
7654            to = exp.DType.CHARACTER_SET.into_expr(kind=self._parse_var_or_string())
7655
7656        return self.build_cast(
7657            strict=strict,
7658            this=this,
7659            to=to,
7660            format=fmt,
7661            safe=safe,
7662            action=self._parse_var_from_options(self.CAST_ACTIONS, raise_unmatched=False),
7663            default=default,
7664        )
7665
7666    def _parse_string_agg(self) -> exp.GroupConcat:
7667        if self._match(TokenType.DISTINCT):
7668            args: list[exp.Expr | None] = [
7669                self.expression(exp.Distinct(expressions=[self._parse_disjunction()]))
7670            ]
7671            if self._match(TokenType.COMMA):
7672                args.extend(self._parse_csv(self._parse_disjunction))
7673        else:
7674            args = self._parse_csv(self._parse_disjunction)  # type: ignore
7675
7676        if self._match_text_seq("ON", "OVERFLOW"):
7677            # trino: LISTAGG(expression [, separator] [ON OVERFLOW overflow_behavior])
7678            if self._match_text_seq("ERROR"):
7679                on_overflow: exp.Expr | None = exp.var("ERROR")
7680            else:
7681                self._match_text_seq("TRUNCATE")
7682                on_overflow = self.expression(
7683                    exp.OverflowTruncateBehavior(
7684                        this=self._parse_string(),
7685                        with_count=(
7686                            self._match_text_seq("WITH", "COUNT")
7687                            or not self._match_text_seq("WITHOUT", "COUNT")
7688                        ),
7689                    )
7690                )
7691        else:
7692            on_overflow = None
7693
7694        index = self._index
7695        if not self._match(TokenType.R_PAREN) and args:
7696            # postgres: STRING_AGG([DISTINCT] expression, separator [ORDER BY expression1 {ASC | DESC} [, ...]])
7697            # bigquery: STRING_AGG([DISTINCT] expression [, separator] [ORDER BY key [{ASC | DESC}] [, ... ]] [LIMIT n])
7698            # The order is parsed through `this` as a canonicalization for WITHIN GROUPs
7699            args[0] = self._parse_limit(this=self._parse_order(this=args[0]))
7700            return self.expression(exp.GroupConcat(this=args[0], separator=seq_get(args, 1)))
7701
7702        # Checks if we can parse an order clause: WITHIN GROUP (ORDER BY <order_by_expression_list> [ASC | DESC]).
7703        # This is done "manually", instead of letting _parse_window parse it into an exp.WithinGroup node, so that
7704        # the STRING_AGG call is parsed like in MySQL / SQLite and can thus be transpiled more easily to them.
7705        if not self._match_text_seq("WITHIN", "GROUP"):
7706            self._retreat(index)
7707            return self.validate_expression(exp.GroupConcat.from_arg_list(args), args)
7708
7709        # The corresponding match_r_paren will be called in parse_function (caller)
7710        self._match_l_paren()
7711
7712        return self.expression(
7713            exp.GroupConcat(
7714                this=self._parse_order(this=seq_get(args, 0)),
7715                separator=seq_get(args, 1),
7716                on_overflow=on_overflow,
7717            )
7718        )
7719
7720    def _parse_convert(self, strict: bool, safe: bool | None = None) -> exp.Expr | None:
7721        this = self._parse_bitwise()
7722
7723        if self._match(TokenType.USING):
7724            to: exp.Expr | None = exp.DType.CHARACTER_SET.into_expr(
7725                kind=self._parse_var(tokens={TokenType.BINARY})
7726            )
7727        elif self._match(TokenType.COMMA):
7728            to = self._parse_types()
7729        else:
7730            to = None
7731
7732        return self.build_cast(strict=strict, this=this, to=to, safe=safe)
7733
7734    def _parse_xml_element(self) -> exp.XMLElement:
7735        if self._match_text_seq("EVALNAME"):
7736            evalname = True
7737            this = self._parse_bitwise()
7738        else:
7739            evalname = None
7740            self._match_text_seq("NAME")
7741            this = self._parse_id_var()
7742
7743        return self.expression(
7744            exp.XMLElement(
7745                this=this,
7746                expressions=self._match(TokenType.COMMA) and self._parse_csv(self._parse_bitwise),
7747                evalname=evalname,
7748            )
7749        )
7750
7751    def _parse_xml_table(self) -> exp.XMLTable:
7752        namespaces = None
7753        passing = None
7754        columns = None
7755
7756        if self._match_text_seq("XMLNAMESPACES", "("):
7757            namespaces = self._parse_xml_namespace()
7758            self._match_text_seq(")", ",")
7759
7760        this = self._parse_string()
7761
7762        if self._match_text_seq("PASSING"):
7763            # The BY VALUE keywords are optional and are provided for semantic clarity
7764            self._match_text_seq("BY", "VALUE")
7765            passing = self._parse_csv(self._parse_column)
7766
7767        by_ref = self._match_text_seq("RETURNING", "SEQUENCE", "BY", "REF")
7768
7769        if self._match_text_seq("COLUMNS"):
7770            columns = self._parse_csv(self._parse_field_def)
7771
7772        return self.expression(
7773            exp.XMLTable(
7774                this=this, namespaces=namespaces, passing=passing, columns=columns, by_ref=by_ref
7775            )
7776        )
7777
7778    def _parse_xml_namespace(self) -> list[exp.XMLNamespace]:
7779        namespaces = []
7780
7781        while True:
7782            if self._match(TokenType.DEFAULT):
7783                uri = self._parse_string()
7784            else:
7785                uri = self._parse_alias(self._parse_string())
7786            namespaces.append(self.expression(exp.XMLNamespace(this=uri)))
7787            if not self._match(TokenType.COMMA):
7788                break
7789
7790        return namespaces
7791
7792    def _parse_decode(self) -> exp.Decode | exp.DecodeCase | None:
7793        args = self._parse_csv(self._parse_disjunction)
7794
7795        if len(args) < 3:
7796            return self.expression(exp.Decode(this=seq_get(args, 0), charset=seq_get(args, 1)))
7797
7798        return self.expression(exp.DecodeCase(expressions=args))
7799
7800    def _parse_json_key_value(self) -> exp.JSONKeyValue | None:
7801        self._match_text_seq("KEY")
7802        key = self._parse_column()
7803        self._match_set(self.JSON_KEY_VALUE_SEPARATOR_TOKENS)
7804        self._match_text_seq("VALUE")
7805        value = self._parse_bitwise()
7806
7807        if not key and not value:
7808            return None
7809        return self.expression(exp.JSONKeyValue(this=key, expression=value))
7810
7811    def _parse_format_json(self, this: exp.Expr | None) -> exp.Expr | None:
7812        if not this or not self._match_text_seq("FORMAT", "JSON"):
7813            return this
7814
7815        return self.expression(exp.FormatJson(this=this))
7816
7817    def _parse_on_condition(self) -> exp.OnCondition | None:
7818        # MySQL uses "X ON EMPTY Y ON ERROR" (e.g. JSON_VALUE) while Oracle uses the opposite (e.g. JSON_EXISTS)
7819        if self.dialect.ON_CONDITION_EMPTY_BEFORE_ERROR:
7820            empty = self._parse_on_handling("EMPTY", *self.ON_CONDITION_TOKENS)
7821            error = self._parse_on_handling("ERROR", *self.ON_CONDITION_TOKENS)
7822        else:
7823            error = self._parse_on_handling("ERROR", *self.ON_CONDITION_TOKENS)
7824            empty = self._parse_on_handling("EMPTY", *self.ON_CONDITION_TOKENS)
7825
7826        null = self._parse_on_handling("NULL", *self.ON_CONDITION_TOKENS)
7827
7828        if not empty and not error and not null:
7829            return None
7830
7831        return self.expression(exp.OnCondition(empty=empty, error=error, null=null))
7832
7833    def _parse_on_handling(self, on: str, *values: str) -> str | None | exp.Expr | None:
7834        # Parses the "X ON Y" or "DEFAULT <expr> ON Y syntax, e.g. NULL ON NULL (Oracle, T-SQL, MySQL)
7835        for value in values:
7836            if self._match_text_seq(value, "ON", on):
7837                return f"{value} ON {on}"
7838
7839        index = self._index
7840        if self._match(TokenType.DEFAULT):
7841            default_value = self._parse_bitwise()
7842            if self._match_text_seq("ON", on):
7843                return default_value
7844
7845            self._retreat(index)
7846
7847        return None
7848
7849    @t.overload
7850    def _parse_json_object(self, agg: t.Literal[False]) -> exp.JSONObject: ...
7851
7852    @t.overload
7853    def _parse_json_object(self, agg: t.Literal[True]) -> exp.JSONObjectAgg: ...
7854
7855    def _parse_json_object(self, agg=False):
7856        star = self._parse_star()
7857        expressions = (
7858            [star]
7859            if star
7860            else self._parse_csv(lambda: self._parse_format_json(self._parse_json_key_value()))
7861        )
7862        null_handling = self._parse_on_handling("NULL", "NULL", "ABSENT")
7863
7864        unique_keys = None
7865        if self._match_text_seq("WITH", "UNIQUE"):
7866            unique_keys = True
7867        elif self._match_text_seq("WITHOUT", "UNIQUE"):
7868            unique_keys = False
7869
7870        self._match_text_seq("KEYS")
7871
7872        return_type = self._match_text_seq("RETURNING") and self._parse_format_json(
7873            self._parse_type()
7874        )
7875        encoding = self._match_text_seq("ENCODING") and self._parse_var()
7876
7877        return self.expression(
7878            (exp.JSONObjectAgg if agg else exp.JSONObject)(
7879                expressions=expressions,
7880                null_handling=null_handling,
7881                unique_keys=unique_keys,
7882                return_type=return_type,
7883                encoding=encoding,
7884            )
7885        )
7886
7887    # Note: this is currently incomplete; it only implements the "JSON_value_column" part
7888    def _parse_json_column_def(self) -> exp.JSONColumnDef:
7889        if not self._match_text_seq("NESTED"):
7890            this = self._parse_id_var()
7891            ordinality = self._match_pair(TokenType.FOR, TokenType.ORDINALITY)
7892            kind = self._parse_types(allow_identifiers=False)
7893            nested = None
7894        else:
7895            this = None
7896            ordinality = None
7897            kind = None
7898            nested = True
7899
7900        path = self._match_text_seq("PATH") and self._parse_string()
7901        nested_schema = nested and self._parse_json_schema()
7902
7903        return self.expression(
7904            exp.JSONColumnDef(
7905                this=this, kind=kind, path=path, nested_schema=nested_schema, ordinality=ordinality
7906            )
7907        )
7908
7909    def _parse_json_schema(self) -> exp.JSONSchema:
7910        self._match_text_seq("COLUMNS")
7911        return self.expression(
7912            exp.JSONSchema(
7913                expressions=self._parse_wrapped_csv(self._parse_json_column_def, optional=True)
7914            )
7915        )
7916
7917    def _parse_json_table(self) -> exp.JSONTable:
7918        this = self._parse_format_json(self._parse_bitwise())
7919        path = self._match(TokenType.COMMA) and self._parse_string()
7920        error_handling = self._parse_on_handling("ERROR", "ERROR", "NULL")
7921        empty_handling = self._parse_on_handling("EMPTY", "ERROR", "NULL")
7922        schema = self._parse_json_schema()
7923
7924        return exp.JSONTable(
7925            this=this,
7926            schema=schema,
7927            path=path,
7928            error_handling=error_handling,
7929            empty_handling=empty_handling,
7930        )
7931
7932    def _parse_match_against(self) -> exp.MatchAgainst:
7933        if self._match_text_seq("TABLE"):
7934            # parse SingleStore MATCH(TABLE ...) syntax
7935            # https://docs.singlestore.com/cloud/reference/sql-reference/full-text-search-functions/match/
7936            expressions = []
7937            table = self._parse_table()
7938            if table:
7939                expressions = [table]
7940        else:
7941            expressions = self._parse_csv(self._parse_column)
7942
7943        self._match_text_seq(")", "AGAINST", "(")
7944
7945        this = self._parse_string()
7946
7947        if self._match_text_seq("IN", "NATURAL", "LANGUAGE", "MODE"):
7948            modifier = "IN NATURAL LANGUAGE MODE"
7949            if self._match_text_seq("WITH", "QUERY", "EXPANSION"):
7950                modifier = f"{modifier} WITH QUERY EXPANSION"
7951        elif self._match_text_seq("IN", "BOOLEAN", "MODE"):
7952            modifier = "IN BOOLEAN MODE"
7953        elif self._match_text_seq("WITH", "QUERY", "EXPANSION"):
7954            modifier = "WITH QUERY EXPANSION"
7955        else:
7956            modifier = None
7957
7958        return self.expression(
7959            exp.MatchAgainst(this=this, expressions=expressions, modifier=modifier)
7960        )
7961
7962    # https://learn.microsoft.com/en-us/sql/t-sql/functions/openjson-transact-sql?view=sql-server-ver16
7963    def _parse_open_json(self) -> exp.OpenJSON:
7964        this = self._parse_bitwise()
7965        path = self._match(TokenType.COMMA) and self._parse_string()
7966
7967        def _parse_open_json_column_def() -> exp.OpenJSONColumnDef:
7968            this = self._parse_field(any_token=True)
7969            kind = self._parse_types()
7970            path = self._parse_string()
7971            as_json = self._match_pair(TokenType.ALIAS, TokenType.JSON)
7972
7973            return self.expression(
7974                exp.OpenJSONColumnDef(this=this, kind=kind, path=path, as_json=as_json)
7975            )
7976
7977        expressions = None
7978        if self._match_pair(TokenType.R_PAREN, TokenType.WITH):
7979            self._match_l_paren()
7980            expressions = self._parse_csv(_parse_open_json_column_def)
7981
7982        return self.expression(exp.OpenJSON(this=this, path=path, expressions=expressions))
7983
7984    def _parse_position(self, haystack_first: bool = False) -> exp.StrPosition:
7985        args = self._parse_csv(self._parse_bitwise)
7986
7987        if self._match(TokenType.IN):
7988            return self.expression(
7989                exp.StrPosition(this=self._parse_bitwise(), substr=seq_get(args, 0))
7990            )
7991
7992        if haystack_first:
7993            haystack = seq_get(args, 0)
7994            needle = seq_get(args, 1)
7995        else:
7996            haystack = seq_get(args, 1)
7997            needle = seq_get(args, 0)
7998
7999        return self.expression(
8000            exp.StrPosition(this=haystack, substr=needle, position=seq_get(args, 2))
8001        )
8002
8003    def _parse_join_hint(self, func_name: str) -> exp.JoinHint:
8004        args = self._parse_csv(self._parse_table)
8005        return exp.JoinHint(this=func_name.upper(), expressions=args)
8006
8007    def _parse_substring(self) -> exp.Substring:
8008        # Postgres supports the form: substring(string [from int] [for int])
8009        # (despite being undocumented, the reverse order also works)
8010        # https://www.postgresql.org/docs/9.1/functions-string.html @ Table 9-6
8011
8012        args = t.cast(list[t.Optional[exp.Expr]], self._parse_csv(self._parse_bitwise))
8013
8014        start, length = None, None
8015
8016        while self._curr:
8017            if self._match(TokenType.FROM):
8018                start = self._parse_bitwise()
8019            elif self._match(TokenType.FOR):
8020                if not start:
8021                    start = exp.Literal.number(1)
8022                length = self._parse_bitwise()
8023            else:
8024                break
8025
8026        if start:
8027            args.append(start)
8028        if length:
8029            args.append(length)
8030
8031        return self.validate_expression(exp.Substring.from_arg_list(args), args)
8032
8033    def _parse_trim(self) -> exp.Trim:
8034        # https://www.w3resource.com/sql/character-functions/trim.php
8035        # https://docs.oracle.com/javadb/10.8.3.0/ref/rreftrimfunc.html
8036
8037        position = None
8038        collation = None
8039        expression = None
8040
8041        if self._match_texts(self.TRIM_TYPES):
8042            position = self._prev.text.upper()
8043
8044        this = self._parse_bitwise()
8045        if self._match_set((TokenType.FROM, TokenType.COMMA)):
8046            invert_order = self._prev.token_type == TokenType.FROM or self.TRIM_PATTERN_FIRST
8047            expression = self._parse_bitwise()
8048
8049            if invert_order:
8050                this, expression = expression, this
8051
8052        if self._match(TokenType.COLLATE):
8053            collation = self._parse_bitwise()
8054
8055        return self.expression(
8056            exp.Trim(this=this, position=position, expression=expression, collation=collation)
8057        )
8058
8059    def _parse_window_clause(self) -> list[exp.Expr] | None:
8060        return self._parse_csv(self._parse_named_window) if self._match(TokenType.WINDOW) else None
8061
8062    def _parse_named_window(self) -> exp.Expr | None:
8063        return self._parse_window(self._parse_id_var(), alias=True)
8064
8065    def _parse_respect_or_ignore_nulls(self, this: exp.Expr | None) -> exp.Expr | None:
8066        if self._curr.token_type == TokenType.VAR:
8067            if self._match_text_seq("IGNORE", "NULLS"):
8068                return self.expression(exp.IgnoreNulls(this=this))
8069            if self._match_text_seq("RESPECT", "NULLS"):
8070                return self.expression(exp.RespectNulls(this=this))
8071        return this
8072
8073    def _parse_having_max(self, this: exp.Expr | None) -> exp.Expr | None:
8074        if self._match(TokenType.HAVING):
8075            self._match_texts(("MAX", "MIN"))
8076            max = self._prev.text.upper() != "MIN"
8077            return self.expression(
8078                exp.HavingMax(this=this, expression=self._parse_column(), max=max)
8079            )
8080
8081        return this
8082
8083    def _parse_window(self, this: exp.Expr | None, alias: bool = False) -> exp.Expr | None:
8084        func = this
8085        comments = func.comments if isinstance(func, exp.Expr) else None
8086
8087        # T-SQL allows the OVER (...) syntax after WITHIN GROUP.
8088        # https://learn.microsoft.com/en-us/sql/t-sql/functions/percentile-disc-transact-sql?view=sql-server-ver16
8089        if self._match_text_seq("WITHIN", "GROUP"):
8090            order = self._parse_wrapped(self._parse_order)
8091            this = self.expression(exp.WithinGroup(this=this, expression=order))
8092
8093        if self._match_pair(TokenType.FILTER, TokenType.L_PAREN):
8094            self._match(TokenType.WHERE)
8095            this = self.expression(
8096                exp.Filter(this=this, expression=self._parse_where(skip_where_token=True))
8097            )
8098            self._match_r_paren()
8099
8100        # SQL spec defines an optional [ { IGNORE | RESPECT } NULLS ] OVER
8101        # Some dialects choose to implement and some do not.
8102        # https://dev.mysql.com/doc/refman/8.0/en/window-function-descriptions.html
8103
8104        # There is some code above in _parse_lambda that handles
8105        #   SELECT FIRST_VALUE(TABLE.COLUMN IGNORE|RESPECT NULLS) OVER ...
8106
8107        # The below changes handle
8108        #   SELECT FIRST_VALUE(TABLE.COLUMN) IGNORE|RESPECT NULLS OVER ...
8109
8110        # Oracle allows both formats
8111        #   (https://docs.oracle.com/en/database/oracle/oracle-database/19/sqlrf/img_text/first_value.html)
8112        #   and Snowflake chose to do the same for familiarity
8113        #   https://docs.snowflake.com/en/sql-reference/functions/first_value.html#usage-notes
8114        if isinstance(this, exp.AggFunc):
8115            ignore_respect = this.find(exp.IgnoreNulls, exp.RespectNulls)
8116
8117            if ignore_respect and ignore_respect is not this:
8118                ignore_respect.replace(ignore_respect.this)
8119                this = self.expression(ignore_respect.__class__(this=this))
8120
8121        this = self._parse_respect_or_ignore_nulls(this)
8122
8123        # bigquery select from window x AS (partition by ...)
8124        if alias:
8125            over = None
8126            self._match(TokenType.ALIAS)
8127        elif not self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS):
8128            return this
8129        else:
8130            over = self._prev.text.upper()
8131
8132        if comments and isinstance(func, exp.Expr):
8133            func.pop_comments()
8134
8135        if not self._match(TokenType.L_PAREN):
8136            return self.expression(
8137                exp.Window(this=this, alias=self._parse_id_var(False), over=over), comments=comments
8138            )
8139
8140        window_alias = self._parse_id_var(any_token=False, tokens=self.WINDOW_ALIAS_TOKENS)
8141
8142        first: bool | None = True if self._match(TokenType.FIRST) else None
8143        if self._match_text_seq("LAST"):
8144            first = False
8145
8146        partition, order = self._parse_partition_and_order()
8147        kind = (
8148            self._match_set((TokenType.ROWS, TokenType.RANGE)) or self._match_text_seq("GROUPS")
8149        ) and self._prev.text
8150
8151        if kind:
8152            self._match(TokenType.BETWEEN)
8153            start = self._parse_window_spec()
8154
8155            end = self._parse_window_spec() if self._match(TokenType.AND) else {}
8156            exclude = (
8157                self._parse_var_from_options(self.WINDOW_EXCLUDE_OPTIONS)
8158                if self._match_text_seq("EXCLUDE")
8159                else None
8160            )
8161
8162            spec = self.expression(
8163                exp.WindowSpec(
8164                    kind=kind,
8165                    start=start["value"],
8166                    start_side=start["side"],
8167                    end=end.get("value"),
8168                    end_side=end.get("side"),
8169                    exclude=exclude,
8170                )
8171            )
8172        else:
8173            spec = None
8174
8175        self._match_r_paren()
8176
8177        window = self.expression(
8178            exp.Window(
8179                this=this,
8180                partition_by=partition,
8181                order=order,
8182                spec=spec,
8183                alias=window_alias,
8184                over=over,
8185                first=first,
8186            ),
8187            comments=comments,
8188        )
8189
8190        # This covers Oracle's FIRST/LAST syntax: aggregate KEEP (...) OVER (...)
8191        if self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS, advance=False):
8192            return self._parse_window(window, alias=alias)
8193
8194        return window
8195
8196    def _parse_partition_and_order(
8197        self,
8198    ) -> tuple[list[exp.Expr], exp.Expr | None]:
8199        return self._parse_partition_by(), self._parse_order()
8200
8201    def _parse_window_spec(self) -> dict[str, str | exp.Expr | None]:
8202        self._match(TokenType.BETWEEN)
8203
8204        return {
8205            "value": (
8206                (self._match_text_seq("UNBOUNDED") and "UNBOUNDED")
8207                or (self._match_text_seq("CURRENT", "ROW") and "CURRENT ROW")
8208                or self._parse_bitwise()
8209            ),
8210            "side": self._prev.text if self._match_texts(self.WINDOW_SIDES) else None,
8211        }
8212
8213    def _parse_alias(self, this: exp.Expr | None, explicit: bool = False) -> exp.Expr | None:
8214        # In some dialects, LIMIT and OFFSET can act as both identifiers and keywords (clauses)
8215        # so this section tries to parse the clause version and if it fails, it treats the token
8216        # as an identifier (alias)
8217        if self._can_parse_limit_or_offset():
8218            return this
8219
8220        any_token = self._match(TokenType.ALIAS)
8221        comments = self._prev_comments
8222
8223        if explicit and not any_token:
8224            return this
8225
8226        if self._match(TokenType.L_PAREN):
8227            aliases = self.expression(
8228                exp.Aliases(
8229                    this=this, expressions=self._parse_csv(lambda: self._parse_id_var(any_token))
8230                ),
8231                comments=comments,
8232            )
8233            self._match_r_paren(aliases)
8234            return aliases
8235
8236        alias = self._parse_id_var(any_token, tokens=self.ALIAS_TOKENS) or (
8237            self.STRING_ALIASES and self._parse_string_as_identifier()
8238        )
8239
8240        if alias:
8241            comments.extend(alias.pop_comments())
8242            this = self.expression(exp.Alias(this=this, alias=alias), comments=comments)
8243            column = this.this
8244
8245            # Moves the comment next to the alias in `expr /* comment */ AS alias`
8246            if not this.comments and column and column.comments:
8247                this.comments = column.pop_comments()
8248
8249        return this
8250
8251    def _parse_id_var(
8252        self,
8253        any_token: bool = True,
8254        tokens: t.Collection[TokenType] | None = None,
8255    ) -> exp.Expr | None:
8256        expression = self._parse_identifier()
8257        if not expression and (
8258            (any_token and self._advance_any()) or self._match_set(tokens or self.ID_VAR_TOKENS)
8259        ):
8260            quoted = self._prev.token_type == TokenType.STRING
8261            expression = self._identifier_expression(quoted=quoted)
8262
8263        return expression
8264
8265    def _parse_string(self) -> exp.Expr | None:
8266        if self._match_set(self.STRING_PARSERS):
8267            return self.STRING_PARSERS[self._prev.token_type](self, self._prev)
8268        return self._parse_placeholder()
8269
8270    def _parse_string_as_identifier(self) -> exp.Identifier | None:
8271        if not self._match(TokenType.STRING):
8272            return None
8273        output = exp.to_identifier(self._prev.text, quoted=True)
8274        output.update_positions(self._prev)
8275        return output
8276
8277    def _parse_number(self) -> exp.Expr | None:
8278        if self._match_set(self.NUMERIC_PARSERS):
8279            return self.NUMERIC_PARSERS[self._prev.token_type](self, self._prev)
8280        return self._parse_placeholder()
8281
8282    def _parse_identifier(self) -> exp.Expr | None:
8283        if self._match(TokenType.IDENTIFIER):
8284            return self._identifier_expression(quoted=True)
8285        return self._parse_placeholder()
8286
8287    def _parse_var(
8288        self,
8289        any_token: bool = False,
8290        tokens: t.Collection[TokenType] | None = None,
8291        upper: bool = False,
8292    ) -> exp.Expr | None:
8293        if (
8294            (any_token and self._advance_any())
8295            or self._match(TokenType.VAR)
8296            or (self._match_set(tokens) if tokens else False)
8297        ):
8298            return self.expression(
8299                exp.Var(this=self._prev.text.upper() if upper else self._prev.text)
8300            )
8301        return self._parse_placeholder()
8302
8303    def _advance_any(self, ignore_reserved: bool = False) -> Token | None:
8304        if self._curr and (ignore_reserved or self._curr.token_type not in self.RESERVED_TOKENS):
8305            self._advance()
8306            return self._prev
8307        return None
8308
8309    def _parse_var_or_string(self, upper: bool = False) -> exp.Expr | None:
8310        return self._parse_string() or self._parse_var(any_token=True, upper=upper)
8311
8312    def _parse_primary_or_var(self) -> exp.Expr | None:
8313        return self._parse_primary() or self._parse_var(any_token=True)
8314
8315    def _parse_null(self) -> exp.Expr | None:
8316        if self._match_set((TokenType.NULL, TokenType.UNKNOWN)):
8317            return self.PRIMARY_PARSERS[TokenType.NULL](self, self._prev)
8318        return self._parse_placeholder()
8319
8320    def _parse_boolean(self) -> exp.Expr | None:
8321        if self._match(TokenType.TRUE):
8322            return self.PRIMARY_PARSERS[TokenType.TRUE](self, self._prev)
8323        if self._match(TokenType.FALSE):
8324            return self.PRIMARY_PARSERS[TokenType.FALSE](self, self._prev)
8325        return self._parse_placeholder()
8326
8327    def _parse_star(self) -> exp.Expr | None:
8328        if self._match(TokenType.STAR):
8329            return self.PRIMARY_PARSERS[TokenType.STAR](self, self._prev)
8330        return self._parse_placeholder()
8331
8332    def _parse_parameter(self) -> exp.Parameter:
8333        this = self._parse_identifier() or self._parse_primary_or_var()
8334        return self.expression(exp.Parameter(this=this))
8335
8336    def _parse_placeholder(self) -> exp.Expr | None:
8337        if self._match_set(self.PLACEHOLDER_PARSERS):
8338            placeholder = self.PLACEHOLDER_PARSERS[self._prev.token_type](self)
8339            if placeholder:
8340                return placeholder
8341            self._advance(-1)
8342        return None
8343
8344    def _parse_star_op(self, *keywords: str) -> list[exp.Expr] | None:
8345        if not self._match_texts(keywords):
8346            return None
8347        if self._match(TokenType.L_PAREN, advance=False):
8348            return self._parse_wrapped_csv(self._parse_expression)
8349
8350        expression = self._parse_alias(self._parse_disjunction(), explicit=True)
8351        return [expression] if expression else None
8352
8353    def _parse_csv(
8354        self, parse_method: t.Callable[[], T | None], sep: TokenType = TokenType.COMMA
8355    ) -> list[T]:
8356        parse_result = parse_method()
8357        items = [parse_result] if parse_result is not None else []
8358
8359        while self._match(sep):
8360            if isinstance(parse_result, exp.Expr):
8361                self._add_comments(parse_result)
8362            parse_result = parse_method()
8363            if parse_result is not None:
8364                items.append(parse_result)
8365
8366        return items
8367
8368    def _parse_wrapped_id_vars(self, optional: bool = False) -> list[exp.Expr]:
8369        return self._parse_wrapped_csv(self._parse_id_var, optional=optional)
8370
8371    def _parse_wrapped_csv(
8372        self,
8373        parse_method: t.Callable[[], T | None],
8374        sep: TokenType = TokenType.COMMA,
8375        optional: bool = False,
8376    ) -> list[T]:
8377        return self._parse_wrapped(
8378            lambda: self._parse_csv(parse_method, sep=sep), optional=optional
8379        )
8380
8381    def _parse_wrapped(self, parse_method: t.Callable[[], T], optional: bool = False) -> T:
8382        wrapped = self._match(TokenType.L_PAREN)
8383        if not wrapped and not optional:
8384            self.raise_error("Expecting (")
8385        parse_result = parse_method()
8386        if wrapped:
8387            self._match_r_paren()
8388        return parse_result
8389
8390    def _parse_expressions(self) -> list[exp.Expr]:
8391        return self._parse_csv(self._parse_expression)
8392
8393    def _parse_select_or_expression(self, alias: bool = False) -> exp.Expr | None:
8394        return (
8395            self._parse_set_operations(
8396                self._parse_alias(self._parse_assignment(), explicit=True)
8397                if alias
8398                else self._parse_assignment()
8399            )
8400            or self._parse_select()
8401        )
8402
8403    def _parse_ddl_select(self) -> exp.Expr | None:
8404        return self._parse_query_modifiers(
8405            self._parse_set_operations(self._parse_select(nested=True, parse_subquery_alias=False))
8406        )
8407
8408    def _parse_transaction(self) -> exp.Transaction | exp.Command:
8409        this = None
8410        if self._match_texts(self.TRANSACTION_KIND):
8411            this = self._prev.text
8412
8413        self._match_texts(("TRANSACTION", "WORK"))
8414
8415        modes = []
8416        while True:
8417            mode = []
8418            while self._match(TokenType.VAR) or self._match(TokenType.NOT):
8419                mode.append(self._prev.text)
8420
8421            if mode:
8422                modes.append(" ".join(mode))
8423            if not self._match(TokenType.COMMA):
8424                break
8425
8426        return self.expression(exp.Transaction(this=this, modes=modes))
8427
8428    def _parse_commit_or_rollback(self) -> exp.Commit | exp.Rollback:
8429        chain = None
8430        savepoint = None
8431        is_rollback = self._prev.token_type == TokenType.ROLLBACK
8432
8433        self._match_texts(("TRANSACTION", "WORK"))
8434
8435        if self._match_text_seq("TO"):
8436            self._match_text_seq("SAVEPOINT")
8437            savepoint = self._parse_id_var()
8438
8439        if self._match(TokenType.AND):
8440            chain = not self._match_text_seq("NO")
8441            self._match_text_seq("CHAIN")
8442
8443        if is_rollback:
8444            return self.expression(exp.Rollback(savepoint=savepoint))
8445
8446        return self.expression(exp.Commit(chain=chain))
8447
8448    def _parse_refresh(self) -> exp.Refresh | exp.Command:
8449        if self._match(TokenType.TABLE):
8450            kind = "TABLE"
8451        elif self._match_text_seq("MATERIALIZED", "VIEW"):
8452            kind = "MATERIALIZED VIEW"
8453        else:
8454            kind = ""
8455
8456        this = self._parse_string() or self._parse_table()
8457        if not kind and not isinstance(this, exp.Literal):
8458            return self._parse_as_command(self._prev)
8459
8460        return self.expression(exp.Refresh(this=this, kind=kind))
8461
8462    def _parse_column_def_with_exists(self):
8463        start = self._index
8464        self._match(TokenType.COLUMN)
8465
8466        exists_column = self._parse_exists(not_=True)
8467        expression = self._parse_field_def()
8468
8469        if not isinstance(expression, exp.ColumnDef):
8470            self._retreat(start)
8471            return None
8472
8473        expression.set("exists", exists_column)
8474
8475        return expression
8476
8477    def _parse_add_column(self) -> exp.ColumnDef | None:
8478        if not self._prev.text.upper() == "ADD":
8479            return None
8480
8481        expression = self._parse_column_def_with_exists()
8482        if not expression:
8483            return None
8484
8485        # https://docs.databricks.com/delta/update-schema.html#explicitly-update-schema-to-add-columns
8486        if self._match_texts(("FIRST", "AFTER")):
8487            position = self._prev.text
8488            column_position = self.expression(
8489                exp.ColumnPosition(this=self._parse_column(), position=position)
8490            )
8491            expression.set("position", column_position)
8492
8493        return expression
8494
8495    def _parse_drop_column(self) -> exp.Drop | exp.Command | None:
8496        drop = self._parse_drop() if self._match(TokenType.DROP) else None
8497        if drop and not isinstance(drop, exp.Command):
8498            drop.set("kind", drop.args.get("kind", "COLUMN"))
8499        return drop
8500
8501    # https://docs.aws.amazon.com/athena/latest/ug/alter-table-drop-partition.html
8502    def _parse_drop_partition(self, exists: bool | None = None) -> exp.DropPartition:
8503        return self.expression(
8504            exp.DropPartition(expressions=self._parse_csv(self._parse_partition), exists=exists)
8505        )
8506
8507    def _parse_alter_table_add(self) -> list[exp.Expr]:
8508        def _parse_add_alteration() -> exp.Expr | None:
8509            self._match_text_seq("ADD")
8510            if self._match_set(self.ADD_CONSTRAINT_TOKENS, advance=False):
8511                return self.expression(
8512                    exp.AddConstraint(expressions=self._parse_csv(self._parse_constraint))
8513                )
8514
8515            column_def = self._parse_add_column()
8516            if isinstance(column_def, exp.ColumnDef):
8517                return column_def
8518
8519            exists = self._parse_exists(not_=True)
8520            if self._match_pair(TokenType.PARTITION, TokenType.L_PAREN, advance=False):
8521                return self.expression(
8522                    exp.AddPartition(
8523                        exists=exists,
8524                        this=self._parse_field(any_token=True),
8525                        location=self._match_text_seq("LOCATION", advance=False)
8526                        and self._parse_property(),
8527                    )
8528                )
8529
8530            return None
8531
8532        if not self._match_set(self.ADD_CONSTRAINT_TOKENS, advance=False) and (
8533            not self.dialect.ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN
8534            or self._match_text_seq("COLUMNS")
8535        ):
8536            schema = self._parse_schema()
8537
8538            return (
8539                ensure_list(schema)
8540                if schema
8541                else self._parse_csv(self._parse_column_def_with_exists)
8542            )
8543
8544        return self._parse_csv(_parse_add_alteration)
8545
8546    def _parse_alter_table_alter(self) -> exp.Expr | None:
8547        if self._match_texts(self.ALTER_ALTER_PARSERS):
8548            return self.ALTER_ALTER_PARSERS[self._prev.text.upper()](self)
8549
8550        # Many dialects support the ALTER [COLUMN] syntax, so if there is no
8551        # keyword after ALTER we default to parsing this statement
8552        self._match(TokenType.COLUMN)
8553        column = self._parse_field(any_token=True)
8554
8555        if self._match_pair(TokenType.DROP, TokenType.DEFAULT):
8556            return self.expression(exp.AlterColumn(this=column, drop=True))
8557        if self._match_pair(TokenType.SET, TokenType.DEFAULT):
8558            return self.expression(exp.AlterColumn(this=column, default=self._parse_disjunction()))
8559        if self._match(TokenType.COMMENT):
8560            return self.expression(exp.AlterColumn(this=column, comment=self._parse_string()))
8561        if self._match_text_seq("DROP", "NOT", "NULL"):
8562            return self.expression(exp.AlterColumn(this=column, drop=True, allow_null=True))
8563        if self._match_text_seq("SET", "NOT", "NULL"):
8564            return self.expression(exp.AlterColumn(this=column, allow_null=False))
8565
8566        if self._match_text_seq("SET", "VISIBLE"):
8567            return self.expression(exp.AlterColumn(this=column, visible="VISIBLE"))
8568        if self._match_text_seq("SET", "INVISIBLE"):
8569            return self.expression(exp.AlterColumn(this=column, visible="INVISIBLE"))
8570
8571        self._match_text_seq("SET", "DATA")
8572        self._match_text_seq("TYPE")
8573        return self.expression(
8574            exp.AlterColumn(
8575                this=column,
8576                dtype=self._parse_types(),
8577                collate=self._match(TokenType.COLLATE) and self._parse_term(),
8578                using=self._match(TokenType.USING) and self._parse_disjunction(),
8579            )
8580        )
8581
8582    def _parse_alter_diststyle(self) -> exp.AlterDistStyle:
8583        if self._match_texts(("ALL", "EVEN", "AUTO")):
8584            return self.expression(exp.AlterDistStyle(this=exp.var(self._prev.text.upper())))
8585
8586        self._match_text_seq("KEY", "DISTKEY")
8587        return self.expression(exp.AlterDistStyle(this=self._parse_column()))
8588
8589    def _parse_alter_sortkey(self, compound: bool | None = None) -> exp.AlterSortKey:
8590        if compound:
8591            self._match_text_seq("SORTKEY")
8592
8593        if self._match(TokenType.L_PAREN, advance=False):
8594            return self.expression(
8595                exp.AlterSortKey(expressions=self._parse_wrapped_id_vars(), compound=compound)
8596            )
8597
8598        self._match_texts(("AUTO", "NONE"))
8599        return self.expression(
8600            exp.AlterSortKey(this=exp.var(self._prev.text.upper()), compound=compound)
8601        )
8602
8603    def _parse_alter_table_drop(self) -> list[exp.Expr]:
8604        index = self._index - 1
8605
8606        partition_exists = self._parse_exists()
8607        if self._match(TokenType.PARTITION, advance=False):
8608            return self._parse_csv(lambda: self._parse_drop_partition(exists=partition_exists))
8609
8610        self._retreat(index)
8611        return self._parse_csv(self._parse_drop_column)
8612
8613    def _parse_alter_table_rename(self) -> exp.AlterRename | exp.RenameColumn | None:
8614        if self._match(TokenType.COLUMN) or not self.ALTER_RENAME_REQUIRES_COLUMN:
8615            exists = self._parse_exists()
8616            old_column = self._parse_column()
8617            to = self._match_text_seq("TO")
8618            new_column = self._parse_column()
8619
8620            if old_column is None or not to or new_column is None:
8621                return None
8622
8623            return self.expression(exp.RenameColumn(this=old_column, to=new_column, exists=exists))
8624
8625        self._match_text_seq("TO")
8626        return self.expression(exp.AlterRename(this=self._parse_table(schema=True)))
8627
8628    def _parse_alter_table_set(self) -> exp.AlterSet:
8629        alter_set = self.expression(exp.AlterSet())
8630
8631        if self._match(TokenType.L_PAREN, advance=False) or self._match_text_seq(
8632            "TABLE", "PROPERTIES"
8633        ):
8634            alter_set.set("expressions", self._parse_wrapped_csv(self._parse_assignment))
8635        elif self._match_text_seq("FILESTREAM_ON", advance=False):
8636            alter_set.set("expressions", [self._parse_assignment()])
8637        elif self._match_texts(("LOGGED", "UNLOGGED")):
8638            alter_set.set("option", exp.var(self._prev.text.upper()))
8639        elif self._match_text_seq("WITHOUT") and self._match_texts(("CLUSTER", "OIDS")):
8640            alter_set.set("option", exp.var(f"WITHOUT {self._prev.text.upper()}"))
8641        elif self._match_text_seq("LOCATION"):
8642            alter_set.set("location", self._parse_field())
8643        elif self._match_text_seq("ACCESS", "METHOD"):
8644            alter_set.set("access_method", self._parse_field())
8645        elif self._match_text_seq("TABLESPACE"):
8646            alter_set.set("tablespace", self._parse_field())
8647        elif self._match_text_seq("FILE", "FORMAT") or self._match_text_seq("FILEFORMAT"):
8648            alter_set.set("file_format", [self._parse_field()])
8649        elif self._match_text_seq("STAGE_FILE_FORMAT"):
8650            alter_set.set("file_format", self._parse_wrapped_options())
8651        elif self._match_text_seq("STAGE_COPY_OPTIONS"):
8652            alter_set.set("copy_options", self._parse_wrapped_options())
8653        elif self._match_text_seq("TAG") or self._match_text_seq("TAGS"):
8654            alter_set.set("tag", self._parse_csv(self._parse_assignment))
8655        else:
8656            if self._match_text_seq("SERDE"):
8657                alter_set.set("serde", self._parse_field())
8658
8659            properties = self._parse_wrapped(self._parse_properties, optional=True)
8660            alter_set.set("expressions", [properties])
8661
8662        return alter_set
8663
8664    def _parse_alter_session(self) -> exp.AlterSession:
8665        """Parse ALTER SESSION SET/UNSET statements."""
8666        if self._match(TokenType.SET):
8667            expressions = self._parse_csv(lambda: self._parse_set_item_assignment())
8668            return self.expression(exp.AlterSession(expressions=expressions, unset=False))
8669
8670        self._match_text_seq("UNSET")
8671        expressions = self._parse_csv(
8672            lambda: self.expression(exp.SetItem(this=self._parse_id_var(any_token=True)))
8673        )
8674        return self.expression(exp.AlterSession(expressions=expressions, unset=True))
8675
8676    def _parse_alter(self) -> exp.Alter | exp.Command:
8677        start = self._prev
8678
8679        iceberg = self._match_text_seq("ICEBERG")
8680
8681        alter_token = self._match_set(self.ALTERABLES) and self._prev
8682        if not alter_token:
8683            return self._parse_as_command(start)
8684        if iceberg and alter_token.token_type != TokenType.TABLE:
8685            return self._parse_as_command(start)
8686
8687        exists = self._parse_exists()
8688        only = self._match_text_seq("ONLY")
8689
8690        if alter_token.token_type == TokenType.SESSION:
8691            this = None
8692            check = None
8693            cluster = None
8694        else:
8695            this = self._parse_table(schema=True, parse_partition=self.ALTER_TABLE_PARTITIONS)
8696            check = self._match_text_seq("WITH", "CHECK")
8697            cluster = self._parse_on_property() if self._match(TokenType.ON) else None
8698
8699            if self._next:
8700                self._advance()
8701
8702        parser = self.ALTER_PARSERS.get(self._prev.text.upper()) if self._prev else None
8703        if parser:
8704            actions = ensure_list(parser(self))
8705            not_valid = self._match_text_seq("NOT", "VALID")
8706            options = self._parse_csv(self._parse_property)
8707            cascade = self.dialect.ALTER_TABLE_SUPPORTS_CASCADE and self._match_text_seq("CASCADE")
8708
8709            if not self._curr and actions:
8710                return self.expression(
8711                    exp.Alter(
8712                        this=this,
8713                        kind=alter_token.text.upper(),
8714                        exists=exists,
8715                        actions=actions,
8716                        only=only,
8717                        options=options,
8718                        cluster=cluster,
8719                        not_valid=not_valid,
8720                        check=check,
8721                        cascade=cascade,
8722                        iceberg=iceberg,
8723                    )
8724                )
8725
8726        return self._parse_as_command(start)
8727
8728    def _parse_analyze(self) -> exp.Analyze | exp.Command:
8729        start = self._prev
8730        # https://duckdb.org/docs/sql/statements/analyze
8731        if not self._curr:
8732            return self.expression(exp.Analyze())
8733
8734        options = []
8735        while self._match_texts(self.ANALYZE_STYLES):
8736            if self._prev.text.upper() == "BUFFER_USAGE_LIMIT":
8737                options.append(f"BUFFER_USAGE_LIMIT {self._parse_number()}")
8738            else:
8739                options.append(self._prev.text.upper())
8740
8741        this: exp.Expr | None = None
8742        inner_expression: exp.Expr | None = None
8743
8744        kind = self._curr.text.upper() if self._curr else None
8745
8746        if self._match(TokenType.TABLE) or self._match(TokenType.INDEX):
8747            this = self._parse_table_parts()
8748        elif self._match_text_seq("TABLES"):
8749            if self._match_set((TokenType.FROM, TokenType.IN)):
8750                kind = f"{kind} {self._prev.text.upper()}"
8751                this = self._parse_table(schema=True, is_db_reference=True)
8752        elif self._match_text_seq("DATABASE"):
8753            this = self._parse_table(schema=True, is_db_reference=True)
8754        elif self._match_text_seq("CLUSTER"):
8755            this = self._parse_table()
8756        # Try matching inner expr keywords before fallback to parse table.
8757        elif self._match_texts(self.ANALYZE_EXPRESSION_PARSERS):
8758            kind = None
8759            inner_expression = self.ANALYZE_EXPRESSION_PARSERS[self._prev.text.upper()](self)
8760        else:
8761            # Empty kind  https://prestodb.io/docs/current/sql/analyze.html
8762            kind = None
8763            this = self._parse_table_parts()
8764
8765        partition = self._try_parse(self._parse_partition)
8766        if not partition and self._match_texts(self.PARTITION_KEYWORDS):
8767            return self._parse_as_command(start)
8768
8769        # https://docs.starrocks.io/docs/sql-reference/sql-statements/cbo_stats/ANALYZE_TABLE/
8770        if self._match_text_seq("WITH", "SYNC", "MODE") or self._match_text_seq(
8771            "WITH", "ASYNC", "MODE"
8772        ):
8773            mode = f"WITH {self._tokens[self._index - 2].text.upper()} MODE"
8774        else:
8775            mode = None
8776
8777        if self._match_texts(self.ANALYZE_EXPRESSION_PARSERS):
8778            inner_expression = self.ANALYZE_EXPRESSION_PARSERS[self._prev.text.upper()](self)
8779
8780        properties = self._parse_properties()
8781        return self.expression(
8782            exp.Analyze(
8783                kind=kind,
8784                this=this,
8785                mode=mode,
8786                partition=partition,
8787                properties=properties,
8788                expression=inner_expression,
8789                options=options,
8790            )
8791        )
8792
8793    # https://spark.apache.org/docs/3.5.1/sql-ref-syntax-aux-analyze-table.html
8794    def _parse_analyze_statistics(self) -> exp.AnalyzeStatistics:
8795        this = None
8796        kind = self._prev.text.upper()
8797        option = self._prev.text.upper() if self._match_text_seq("DELTA") else None
8798        expressions = []
8799
8800        if not self._match_text_seq("STATISTICS"):
8801            self.raise_error("Expecting token STATISTICS")
8802
8803        if self._match_text_seq("NOSCAN"):
8804            this = "NOSCAN"
8805        elif self._match(TokenType.FOR):
8806            if self._match_text_seq("ALL", "COLUMNS"):
8807                this = "FOR ALL COLUMNS"
8808            if self._match_texts("COLUMNS"):
8809                this = "FOR COLUMNS"
8810                expressions = self._parse_csv(self._parse_column_reference)
8811        elif self._match_text_seq("SAMPLE"):
8812            sample = self._parse_number()
8813            expressions = [
8814                self.expression(
8815                    exp.AnalyzeSample(
8816                        sample=sample,
8817                        kind=self._prev.text.upper() if self._match(TokenType.PERCENT) else None,
8818                    )
8819                )
8820            ]
8821
8822        return self.expression(
8823            exp.AnalyzeStatistics(kind=kind, option=option, this=this, expressions=expressions)
8824        )
8825
8826    # https://docs.oracle.com/en/database/oracle/oracle-database/21/sqlrf/ANALYZE.html
8827    def _parse_analyze_validate(self) -> exp.AnalyzeValidate:
8828        kind = None
8829        this = None
8830        expression: exp.Expr | None = None
8831        if self._match_text_seq("REF", "UPDATE"):
8832            kind = "REF"
8833            this = "UPDATE"
8834            if self._match_text_seq("SET", "DANGLING", "TO", "NULL"):
8835                this = "UPDATE SET DANGLING TO NULL"
8836        elif self._match_text_seq("STRUCTURE"):
8837            kind = "STRUCTURE"
8838            if self._match_text_seq("CASCADE", "FAST"):
8839                this = "CASCADE FAST"
8840            elif self._match_text_seq("CASCADE", "COMPLETE") and self._match_texts(
8841                ("ONLINE", "OFFLINE")
8842            ):
8843                this = f"CASCADE COMPLETE {self._prev.text.upper()}"
8844                expression = self._parse_into()
8845
8846        return self.expression(exp.AnalyzeValidate(kind=kind, this=this, expression=expression))
8847
8848    def _parse_analyze_columns(self) -> exp.AnalyzeColumns | None:
8849        this = self._prev.text.upper()
8850        if self._match_text_seq("COLUMNS"):
8851            return self.expression(exp.AnalyzeColumns(this=f"{this} {self._prev.text.upper()}"))
8852        return None
8853
8854    def _parse_analyze_delete(self) -> exp.AnalyzeDelete | None:
8855        kind = self._prev.text.upper() if self._match_text_seq("SYSTEM") else None
8856        if self._match_text_seq("STATISTICS"):
8857            return self.expression(exp.AnalyzeDelete(kind=kind))
8858        return None
8859
8860    def _parse_analyze_list(self) -> exp.AnalyzeListChainedRows | None:
8861        if self._match_text_seq("CHAINED", "ROWS"):
8862            return self.expression(exp.AnalyzeListChainedRows(expression=self._parse_into()))
8863        return None
8864
8865    # https://dev.mysql.com/doc/refman/8.4/en/analyze-table.html
8866    def _parse_analyze_histogram(self) -> exp.AnalyzeHistogram:
8867        this = self._prev.text.upper()
8868        expression: exp.Expr | None = None
8869        expressions = []
8870        update_options = None
8871
8872        if self._match_text_seq("HISTOGRAM", "ON"):
8873            expressions = self._parse_csv(self._parse_column_reference)
8874            with_expressions = []
8875            while self._match(TokenType.WITH):
8876                # https://docs.starrocks.io/docs/sql-reference/sql-statements/cbo_stats/ANALYZE_TABLE/
8877                if self._match_texts(("SYNC", "ASYNC")):
8878                    if self._match_text_seq("MODE", advance=False):
8879                        with_expressions.append(f"{self._prev.text.upper()} MODE")
8880                        self._advance()
8881                else:
8882                    buckets = self._parse_number()
8883                    if self._match_text_seq("BUCKETS"):
8884                        with_expressions.append(f"{buckets} BUCKETS")
8885            if with_expressions:
8886                expression = self.expression(exp.AnalyzeWith(expressions=with_expressions))
8887
8888            if self._match_texts(("MANUAL", "AUTO")) and self._match(
8889                TokenType.UPDATE, advance=False
8890            ):
8891                update_options = self._prev.text.upper()
8892                self._advance()
8893            elif self._match_text_seq("USING", "DATA"):
8894                expression = self.expression(exp.UsingData(this=self._parse_string()))
8895
8896        return self.expression(
8897            exp.AnalyzeHistogram(
8898                this=this,
8899                expressions=expressions,
8900                expression=expression,
8901                update_options=update_options,
8902            )
8903        )
8904
8905    def _parse_merge(self) -> exp.Merge:
8906        self._match(TokenType.INTO)
8907        target = self._parse_table()
8908
8909        if target and self._match(TokenType.ALIAS, advance=False):
8910            target.set("alias", self._parse_table_alias())
8911
8912        self._match(TokenType.USING)
8913        using = self._parse_table()
8914
8915        return self.expression(
8916            exp.Merge(
8917                this=target,
8918                using=using,
8919                on=self._match(TokenType.ON) and self._parse_disjunction(),
8920                using_cond=self._match(TokenType.USING) and self._parse_using_identifiers(),
8921                whens=self._parse_when_matched(),
8922                returning=self._parse_returning(),
8923            )
8924        )
8925
8926    def _parse_when_matched(self) -> exp.Whens:
8927        whens = []
8928
8929        while self._match(TokenType.WHEN):
8930            matched = not self._match(TokenType.NOT)
8931            self._match_text_seq("MATCHED")
8932            source = (
8933                False
8934                if self._match_text_seq("BY", "TARGET")
8935                else self._match_text_seq("BY", "SOURCE")
8936            )
8937            condition = self._parse_disjunction() if self._match(TokenType.AND) else None
8938
8939            self._match(TokenType.THEN)
8940
8941            if self._match(TokenType.INSERT):
8942                this = self._parse_star()
8943                if this:
8944                    then: exp.Expr | None = self.expression(exp.Insert(this=this))
8945                else:
8946                    then = self.expression(
8947                        exp.Insert(
8948                            this=exp.var("ROW")
8949                            if self._match_text_seq("ROW")
8950                            else self._parse_value(values=False),
8951                            expression=self._match_text_seq("VALUES") and self._parse_value(),
8952                            where=self._parse_where(),
8953                        )
8954                    )
8955            elif self._match(TokenType.UPDATE):
8956                expressions = self._parse_star()
8957                if expressions:
8958                    then = self.expression(exp.Update(expressions=expressions))
8959                else:
8960                    then = self.expression(
8961                        exp.Update(
8962                            expressions=self._match(TokenType.SET)
8963                            and self._parse_csv(self._parse_equality),
8964                            where=self._parse_where(),
8965                        )
8966                    )
8967            elif self._match(TokenType.DELETE):
8968                then = self.expression(exp.Var(this=self._prev.text))
8969            else:
8970                then = self._parse_var_from_options(self.CONFLICT_ACTIONS)
8971
8972            whens.append(
8973                self.expression(
8974                    exp.When(matched=matched, source=source, condition=condition, then=then)
8975                )
8976            )
8977        return self.expression(exp.Whens(expressions=whens))
8978
8979    def _parse_show(self) -> exp.Expr | None:
8980        parser = self._find_parser(self.SHOW_PARSERS, self.SHOW_TRIE)
8981        if parser:
8982            return parser(self)
8983        return self._parse_as_command(self._prev)
8984
8985    def _parse_set_item_assignment(self, kind: str | None = None) -> exp.Expr | None:
8986        index = self._index
8987
8988        if kind in ("GLOBAL", "SESSION") and self._match_text_seq("TRANSACTION"):
8989            return self._parse_set_transaction(global_=kind == "GLOBAL")
8990
8991        left = self._parse_primary() or self._parse_column()
8992        assignment_delimiter = self._match_texts(self.SET_ASSIGNMENT_DELIMITERS)
8993
8994        if not left or (self.SET_REQUIRES_ASSIGNMENT_DELIMITER and not assignment_delimiter):
8995            self._retreat(index)
8996            return None
8997
8998        right = self._parse_statement() or self._parse_id_var()
8999        if isinstance(right, (exp.Column, exp.Identifier)):
9000            right = exp.var(right.name)
9001
9002        this = self.expression(exp.EQ(this=left, expression=right))
9003        return self.expression(exp.SetItem(this=this, kind=kind))
9004
9005    def _parse_set_transaction(self, global_: bool = False) -> exp.Expr:
9006        self._match_text_seq("TRANSACTION")
9007        characteristics = self._parse_csv(
9008            lambda: self._parse_var_from_options(self.TRANSACTION_CHARACTERISTICS)
9009        )
9010        return self.expression(
9011            exp.SetItem(expressions=characteristics, kind="TRANSACTION", global_=global_)
9012        )
9013
9014    def _parse_set_item(self) -> exp.Expr | None:
9015        parser = self._find_parser(self.SET_PARSERS, self.SET_TRIE)
9016        return parser(self) if parser else self._parse_set_item_assignment(kind=None)
9017
9018    def _parse_set(self, unset: bool = False, tag: bool = False) -> exp.Set | exp.Command:
9019        index = self._index
9020        set_ = self.expression(
9021            exp.Set(expressions=self._parse_csv(self._parse_set_item), unset=unset, tag=tag)
9022        )
9023
9024        if self._curr:
9025            self._retreat(index)
9026            return self._parse_as_command(self._prev)
9027
9028        return set_
9029
9030    def _parse_var_from_options(
9031        self, options: OPTIONS_TYPE, raise_unmatched: bool = True
9032    ) -> exp.Var | None:
9033        start = self._curr
9034        if not start:
9035            return None
9036
9037        option = start.text.upper()
9038        continuations = options.get(option)
9039
9040        index = self._index
9041        self._advance()
9042        for keywords in continuations or []:
9043            if isinstance(keywords, str):
9044                keywords = (keywords,)
9045
9046            if self._match_text_seq(*keywords):
9047                option = f"{option} {' '.join(keywords)}"
9048                break
9049        else:
9050            if continuations or continuations is None:
9051                if raise_unmatched:
9052                    self.raise_error(f"Unknown option {option}")
9053
9054                self._retreat(index)
9055                return None
9056
9057        return exp.var(option)
9058
9059    def _parse_as_command(self, start: Token) -> exp.Command:
9060        while self._curr:
9061            self._advance()
9062        text = self._find_sql(start, self._prev)
9063        size = len(start.text)
9064        self._warn_unsupported()
9065        return exp.Command(this=text[:size], expression=text[size:])
9066
9067    def _parse_dict_property(self, this: str) -> exp.DictProperty:
9068        settings = []
9069
9070        self._match_l_paren()
9071        kind = self._parse_id_var()
9072
9073        if self._match(TokenType.L_PAREN):
9074            while True:
9075                key = self._parse_id_var()
9076                value = self._parse_function() or self._parse_primary_or_var()
9077                if not key and value is None:
9078                    break
9079                settings.append(self.expression(exp.DictSubProperty(this=key, value=value)))
9080            self._match(TokenType.R_PAREN)
9081
9082        self._match_r_paren()
9083
9084        return self.expression(
9085            exp.DictProperty(this=this, kind=kind.this if kind else None, settings=settings)
9086        )
9087
9088    def _parse_dict_range(self, this: str) -> exp.DictRange:
9089        self._match_l_paren()
9090        has_min = self._match_text_seq("MIN")
9091        if has_min:
9092            min = self._parse_var() or self._parse_primary()
9093            self._match_text_seq("MAX")
9094            max = self._parse_var() or self._parse_primary()
9095        else:
9096            max = self._parse_var() or self._parse_primary()
9097            min = exp.Literal.number(0)
9098        self._match_r_paren()
9099        return self.expression(exp.DictRange(this=this, min=min, max=max))
9100
9101    def _parse_comprehension(self, this: exp.Expr | None) -> exp.Comprehension | None:
9102        index = self._index
9103        expression = self._parse_column()
9104        position = self._match(TokenType.COMMA) and self._parse_column()
9105
9106        if not self._match(TokenType.IN):
9107            self._retreat(index - 1)
9108            return None
9109        iterator = self._parse_column()
9110        condition = self._parse_disjunction() if self._match_text_seq("IF") else None
9111        return self.expression(
9112            exp.Comprehension(
9113                this=this,
9114                expression=expression,
9115                position=position,
9116                iterator=iterator,
9117                condition=condition,
9118            )
9119        )
9120
9121    def _parse_heredoc(self) -> exp.Heredoc | None:
9122        if self._match(TokenType.HEREDOC_STRING):
9123            return self.expression(exp.Heredoc(this=self._prev.text))
9124
9125        if not self._match_text_seq("$"):
9126            return None
9127
9128        tags = ["$"]
9129        tag_text = None
9130
9131        if self._is_connected():
9132            self._advance()
9133            tags.append(self._prev.text.upper())
9134        else:
9135            self.raise_error("No closing $ found")
9136
9137        if tags[-1] != "$":
9138            if self._is_connected() and self._match_text_seq("$"):
9139                tag_text = tags[-1]
9140                tags.append("$")
9141            else:
9142                self.raise_error("No closing $ found")
9143
9144        heredoc_start = self._curr
9145
9146        while self._curr:
9147            if self._match_text_seq(*tags, advance=False):
9148                this = self._find_sql(heredoc_start, self._prev)
9149                self._advance(len(tags))
9150                return self.expression(exp.Heredoc(this=this, tag=tag_text))
9151
9152            self._advance()
9153
9154        self.raise_error(f"No closing {''.join(tags)} found")
9155        return None
9156
9157    def _find_parser(self, parsers: dict[str, t.Callable], trie: dict) -> t.Callable | None:
9158        if not self._curr:
9159            return None
9160
9161        index = self._index
9162        this = []
9163        while True:
9164            # The current token might be multiple words
9165            curr = self._curr.text.upper()
9166            key = curr.split(" ")
9167            this.append(curr)
9168
9169            self._advance()
9170            result, trie = in_trie(trie, key)
9171            if result == TrieResult.FAILED:
9172                break
9173
9174            if result == TrieResult.EXISTS:
9175                subparser = parsers[" ".join(this)]
9176                return subparser
9177
9178        self._retreat(index)
9179        return None
9180
9181    def _match_l_paren(self, expression: exp.Expr | None = None) -> None:
9182        if not self._match(TokenType.L_PAREN, expression=expression):
9183            self.raise_error("Expecting (")
9184
9185    def _match_r_paren(self, expression: exp.Expr | None = None) -> None:
9186        if not self._match(TokenType.R_PAREN, expression=expression):
9187            self.raise_error("Expecting )")
9188
9189    def _replace_lambda(
9190        self, node: exp.Expr | None, expressions: list[exp.Expr]
9191    ) -> exp.Expr | None:
9192        if not node:
9193            return node
9194
9195        lambda_types = {e.name: e.args.get("to") or False for e in expressions}
9196
9197        for column in node.find_all(exp.Column):
9198            typ = lambda_types.get(column.parts[0].name)
9199            if typ is not None:
9200                dot_or_id = column.to_dot() if column.table else column.this
9201
9202                if typ:
9203                    dot_or_id = self.expression(exp.Cast(this=dot_or_id, to=typ))
9204
9205                parent = column.parent
9206
9207                while isinstance(parent, exp.Dot):
9208                    if not isinstance(parent.parent, exp.Dot):
9209                        parent.replace(dot_or_id)
9210                        break
9211                    parent = parent.parent
9212                else:
9213                    if column is node:
9214                        node = dot_or_id
9215                    else:
9216                        column.replace(dot_or_id)
9217        return node
9218
9219    def _parse_truncate_table(self) -> exp.TruncateTable | None | exp.Expr:
9220        start = self._prev
9221
9222        # Not to be confused with TRUNCATE(number, decimals) function call
9223        if self._match(TokenType.L_PAREN):
9224            self._retreat(self._index - 2)
9225            return self._parse_function()
9226
9227        # Clickhouse supports TRUNCATE DATABASE as well
9228        is_database = self._match(TokenType.DATABASE)
9229
9230        self._match(TokenType.TABLE)
9231
9232        exists = self._parse_exists(not_=False)
9233
9234        expressions = self._parse_csv(
9235            lambda: self._parse_table(schema=True, is_db_reference=is_database)
9236        )
9237
9238        cluster = self._parse_on_property() if self._match(TokenType.ON) else None
9239
9240        if self._match_text_seq("RESTART", "IDENTITY"):
9241            identity = "RESTART"
9242        elif self._match_text_seq("CONTINUE", "IDENTITY"):
9243            identity = "CONTINUE"
9244        else:
9245            identity = None
9246
9247        if self._match_text_seq("CASCADE") or self._match_text_seq("RESTRICT"):
9248            option = self._prev.text
9249        else:
9250            option = None
9251
9252        partition = self._parse_partition()
9253
9254        # Fallback case
9255        if self._curr:
9256            return self._parse_as_command(start)
9257
9258        return self.expression(
9259            exp.TruncateTable(
9260                expressions=expressions,
9261                is_database=is_database,
9262                exists=exists,
9263                cluster=cluster,
9264                identity=identity,
9265                option=option,
9266                partition=partition,
9267            )
9268        )
9269
9270    def _parse_with_operator(self) -> exp.Expr | None:
9271        this = self._parse_ordered(self._parse_opclass)
9272
9273        if not self._match(TokenType.WITH):
9274            return this
9275
9276        op = self._parse_var(any_token=True, tokens=self.RESERVED_TOKENS)
9277
9278        return self.expression(exp.WithOperator(this=this, op=op))
9279
9280    def _parse_wrapped_options(self) -> list[exp.Expr]:
9281        self._match(TokenType.EQ)
9282        self._match(TokenType.L_PAREN)
9283
9284        opts: list[exp.Expr] = []
9285        option: exp.Expr | list[exp.Expr] | None
9286        while self._curr and not self._match(TokenType.R_PAREN):
9287            if self._match_text_seq("FORMAT_NAME", "="):
9288                # The FORMAT_NAME can be set to an identifier for Snowflake and T-SQL
9289                option = self._parse_format_name()
9290            else:
9291                option = self._parse_property()
9292
9293            if option is None:
9294                self.raise_error("Unable to parse option")
9295                break
9296
9297            opts.extend(ensure_list(option))
9298
9299        return opts
9300
9301    def _parse_copy_parameters(self) -> list[exp.CopyParameter]:
9302        sep = TokenType.COMMA if self.dialect.COPY_PARAMS_ARE_CSV else None
9303
9304        options = []
9305        while self._curr and not self._match(TokenType.R_PAREN, advance=False):
9306            option = self._parse_var(any_token=True)
9307            prev = self._prev.text.upper()
9308
9309            # Different dialects might separate options and values by white space, "=" and "AS"
9310            self._match(TokenType.EQ)
9311            self._match(TokenType.ALIAS)
9312
9313            param = self.expression(exp.CopyParameter(this=option))
9314
9315            if prev in self.COPY_INTO_VARLEN_OPTIONS and self._match(
9316                TokenType.L_PAREN, advance=False
9317            ):
9318                # Snowflake FILE_FORMAT case, Databricks COPY & FORMAT options
9319                param.set("expressions", self._parse_wrapped_options())
9320            elif prev == "FILE_FORMAT":
9321                # T-SQL's external file format case
9322                param.set("expression", self._parse_field())
9323            elif (
9324                prev == "FORMAT"
9325                and self._prev.token_type == TokenType.ALIAS
9326                and self._match_texts(("AVRO", "JSON"))
9327            ):
9328                param.set("this", exp.var(f"FORMAT AS {self._prev.text.upper()}"))
9329                param.set("expression", self._parse_field())
9330            else:
9331                param.set("expression", self._parse_unquoted_field() or self._parse_bracket())
9332
9333            options.append(param)
9334
9335            if sep:
9336                self._match(sep)
9337
9338        return options
9339
9340    def _parse_credentials(self) -> exp.Credentials | None:
9341        expr = self.expression(exp.Credentials())
9342
9343        if self._match_text_seq("STORAGE_INTEGRATION", "="):
9344            expr.set("storage", self._parse_field())
9345        if self._match_text_seq("CREDENTIALS"):
9346            # Snowflake case: CREDENTIALS = (...), Redshift case: CREDENTIALS <string>
9347            creds = (
9348                self._parse_wrapped_options() if self._match(TokenType.EQ) else self._parse_field()
9349            )
9350            expr.set("credentials", creds)
9351        if self._match_text_seq("ENCRYPTION"):
9352            expr.set("encryption", self._parse_wrapped_options())
9353        if self._match_text_seq("IAM_ROLE"):
9354            expr.set(
9355                "iam_role",
9356                exp.var(self._prev.text) if self._match(TokenType.DEFAULT) else self._parse_field(),
9357            )
9358        if self._match_text_seq("REGION"):
9359            expr.set("region", self._parse_field())
9360
9361        return expr
9362
9363    def _parse_file_location(self) -> exp.Expr | None:
9364        return self._parse_field()
9365
9366    def _parse_copy(self) -> exp.Copy | exp.Command:
9367        start = self._prev
9368
9369        self._match(TokenType.INTO)
9370
9371        this = (
9372            self._parse_select(nested=True, parse_subquery_alias=False)
9373            if self._match(TokenType.L_PAREN, advance=False)
9374            else self._parse_table(schema=True)
9375        )
9376
9377        kind = self._match(TokenType.FROM) or not self._match_text_seq("TO")
9378
9379        files = self._parse_csv(self._parse_file_location)
9380        if self._match(TokenType.EQ, advance=False):
9381            # Backtrack one token since we've consumed the lhs of a parameter assignment here.
9382            # This can happen for Snowflake dialect. Instead, we'd like to parse the parameter
9383            # list via `_parse_wrapped(..)` below.
9384            self._advance(-1)
9385            files = []
9386
9387        credentials = self._parse_credentials()
9388
9389        self._match_text_seq("WITH")
9390
9391        params = self._parse_wrapped(self._parse_copy_parameters, optional=True)
9392
9393        # Fallback case
9394        if self._curr:
9395            return self._parse_as_command(start)
9396
9397        return self.expression(
9398            exp.Copy(this=this, kind=kind, credentials=credentials, files=files, params=params)
9399        )
9400
9401    def _parse_normalize(self) -> exp.Normalize:
9402        return self.expression(
9403            exp.Normalize(
9404                this=self._parse_bitwise(), form=self._match(TokenType.COMMA) and self._parse_var()
9405            )
9406        )
9407
9408    def _parse_ceil_floor(self, expr_type: type[TCeilFloor]) -> TCeilFloor:
9409        args = self._parse_csv(lambda: self._parse_lambda())
9410
9411        this = seq_get(args, 0)
9412        decimals = seq_get(args, 1)
9413
9414        return expr_type(
9415            this=this,
9416            decimals=decimals,
9417            to=self._parse_var() if self._match_text_seq("TO") else None,
9418        )
9419
9420    def _parse_star_ops(self) -> exp.Expr | None:
9421        star_token = self._prev
9422
9423        if self._match_text_seq("COLUMNS", "(", advance=False):
9424            this = self._parse_function()
9425            if isinstance(this, exp.Columns):
9426                this.set("unpack", True)
9427            return this
9428
9429        return self.expression(
9430            exp.Star(
9431                except_=self._parse_star_op("EXCEPT", "EXCLUDE"),
9432                replace=self._parse_star_op("REPLACE"),
9433                rename=self._parse_star_op("RENAME"),
9434            )
9435        ).update_positions(star_token)
9436
9437    def _parse_grant_privilege(self) -> exp.GrantPrivilege | None:
9438        privilege_parts = []
9439
9440        # Keep consuming consecutive keywords until comma (end of this privilege) or ON
9441        # (end of privilege list) or L_PAREN (start of column list) are met
9442        while self._curr and not self._match_set(self.PRIVILEGE_FOLLOW_TOKENS, advance=False):
9443            privilege_parts.append(self._curr.text.upper())
9444            self._advance()
9445
9446        this = exp.var(" ".join(privilege_parts))
9447        expressions = (
9448            self._parse_wrapped_csv(self._parse_column)
9449            if self._match(TokenType.L_PAREN, advance=False)
9450            else None
9451        )
9452
9453        return self.expression(exp.GrantPrivilege(this=this, expressions=expressions))
9454
9455    def _parse_grant_principal(self) -> exp.GrantPrincipal | None:
9456        kind = self._match_texts(("ROLE", "GROUP")) and self._prev.text.upper()
9457        principal = self._parse_id_var()
9458
9459        if not principal:
9460            return None
9461
9462        return self.expression(exp.GrantPrincipal(this=principal, kind=kind))
9463
9464    def _parse_grant_revoke_common(
9465        self,
9466    ) -> tuple[list | None, str | None, exp.Expr | None]:
9467        privileges = self._parse_csv(self._parse_grant_privilege)
9468
9469        self._match(TokenType.ON)
9470        kind = self._prev.text.upper() if self._match_set(self.CREATABLES) else None
9471
9472        # Attempt to parse the securable e.g. MySQL allows names
9473        # such as "foo.*", "*.*" which are not easily parseable yet
9474        securable = self._try_parse(self._parse_table_parts)
9475
9476        return privileges, kind, securable
9477
9478    def _parse_grant(self) -> exp.Grant | exp.Command:
9479        start = self._prev
9480
9481        privileges, kind, securable = self._parse_grant_revoke_common()
9482
9483        if not securable or not self._match_text_seq("TO"):
9484            return self._parse_as_command(start)
9485
9486        principals = self._parse_csv(self._parse_grant_principal)
9487
9488        grant_option = self._match_text_seq("WITH", "GRANT", "OPTION")
9489
9490        if self._curr:
9491            return self._parse_as_command(start)
9492
9493        return self.expression(
9494            exp.Grant(
9495                privileges=privileges,
9496                kind=kind,
9497                securable=securable,
9498                principals=principals,
9499                grant_option=grant_option,
9500            )
9501        )
9502
9503    def _parse_revoke(self) -> exp.Revoke | exp.Command:
9504        start = self._prev
9505
9506        grant_option = self._match_text_seq("GRANT", "OPTION", "FOR")
9507
9508        privileges, kind, securable = self._parse_grant_revoke_common()
9509
9510        if not securable or not self._match_text_seq("FROM"):
9511            return self._parse_as_command(start)
9512
9513        principals = self._parse_csv(self._parse_grant_principal)
9514
9515        cascade = None
9516        if self._match_texts(("CASCADE", "RESTRICT")):
9517            cascade = self._prev.text.upper()
9518
9519        if self._curr:
9520            return self._parse_as_command(start)
9521
9522        return self.expression(
9523            exp.Revoke(
9524                privileges=privileges,
9525                kind=kind,
9526                securable=securable,
9527                principals=principals,
9528                grant_option=grant_option,
9529                cascade=cascade,
9530            )
9531        )
9532
9533    def _parse_overlay(self) -> exp.Overlay:
9534        def _parse_overlay_arg(text: str) -> exp.Expr | None:
9535            return (
9536                self._parse_bitwise()
9537                if self._match(TokenType.COMMA) or self._match_text_seq(text)
9538                else None
9539            )
9540
9541        return self.expression(
9542            exp.Overlay(
9543                this=self._parse_bitwise(),
9544                expression=_parse_overlay_arg("PLACING"),
9545                from_=_parse_overlay_arg("FROM"),
9546                for_=_parse_overlay_arg("FOR"),
9547            )
9548        )
9549
9550    def _parse_format_name(self) -> exp.Property:
9551        # Note: Although not specified in the docs, Snowflake does accept a string/identifier
9552        # for FILE_FORMAT = <format_name>
9553        return self.expression(
9554            exp.Property(
9555                this=exp.var("FORMAT_NAME"), value=self._parse_string() or self._parse_table_parts()
9556            )
9557        )
9558
9559    def _parse_max_min_by(self, expr_type: type[exp.AggFunc]) -> exp.AggFunc:
9560        args: list[exp.Expr] = []
9561
9562        if self._match(TokenType.DISTINCT):
9563            args.append(self.expression(exp.Distinct(expressions=[self._parse_lambda()])))
9564            self._match(TokenType.COMMA)
9565
9566        args.extend(self._parse_function_args())
9567
9568        return self.expression(
9569            expr_type(this=seq_get(args, 0), expression=seq_get(args, 1), count=seq_get(args, 2))
9570        )
9571
9572    def _identifier_expression(
9573        self, token: Token | None = None, quoted: bool | None = None
9574    ) -> exp.Identifier:
9575        token = token or self._prev
9576        return self.expression(exp.Identifier(this=token.text, quoted=quoted), token)
9577
9578    def _build_pipe_cte(
9579        self,
9580        query: exp.Query,
9581        expressions: list[exp.Expr],
9582        alias_cte: exp.TableAlias | None = None,
9583    ) -> exp.Select:
9584        new_cte: str | exp.TableAlias | None
9585        if alias_cte:
9586            new_cte = alias_cte
9587        else:
9588            self._pipe_cte_counter += 1
9589            new_cte = f"__tmp{self._pipe_cte_counter}"
9590
9591        with_ = query.args.get("with_")
9592        ctes = with_.pop() if with_ else None
9593
9594        new_select = exp.select(*expressions, copy=False).from_(new_cte, copy=False)
9595        if ctes:
9596            new_select.set("with_", ctes)
9597
9598        return new_select.with_(new_cte, as_=query, copy=False)
9599
9600    def _parse_pipe_syntax_select(self, query: exp.Select) -> exp.Select:
9601        select = self._parse_select(consume_pipe=False)
9602        if not select:
9603            return query
9604
9605        return self._build_pipe_cte(
9606            query=query.select(*select.expressions, append=False), expressions=[exp.Star()]
9607        )
9608
9609    def _parse_pipe_syntax_limit(self, query: exp.Select) -> exp.Select:
9610        limit = self._parse_limit()
9611        offset = self._parse_offset()
9612        if limit:
9613            curr_limit = query.args.get("limit", limit)
9614            if curr_limit.expression.to_py() >= limit.expression.to_py():
9615                query.limit(limit, copy=False)
9616        if offset:
9617            curr_offset = query.args.get("offset")
9618            curr_offset = curr_offset.expression.to_py() if curr_offset else 0
9619            query.offset(exp.Literal.number(curr_offset + offset.expression.to_py()), copy=False)
9620
9621        return query
9622
9623    def _parse_pipe_syntax_aggregate_fields(self) -> exp.Expr | None:
9624        this = self._parse_disjunction()
9625        if self._match_text_seq("GROUP", "AND", advance=False):
9626            return this
9627
9628        this = self._parse_alias(this)
9629
9630        if self._match_set((TokenType.ASC, TokenType.DESC), advance=False):
9631            return self._parse_ordered(lambda: this)
9632
9633        return this
9634
9635    def _parse_pipe_syntax_aggregate_group_order_by(
9636        self, query: exp.Select, group_by_exists: bool = True
9637    ) -> exp.Select:
9638        expr = self._parse_csv(self._parse_pipe_syntax_aggregate_fields)
9639        aggregates_or_groups, orders = [], []
9640        for element in expr:
9641            if isinstance(element, exp.Ordered):
9642                this = element.this
9643                if isinstance(this, exp.Alias):
9644                    element.set("this", this.args["alias"])
9645                orders.append(element)
9646            else:
9647                this = element
9648            aggregates_or_groups.append(this)
9649
9650        if group_by_exists:
9651            query.select(*aggregates_or_groups, copy=False).group_by(
9652                *[projection.args.get("alias", projection) for projection in aggregates_or_groups],
9653                copy=False,
9654            )
9655        else:
9656            query.select(*aggregates_or_groups, append=False, copy=False)
9657
9658        if orders:
9659            return query.order_by(*orders, append=False, copy=False)
9660
9661        return query
9662
9663    def _parse_pipe_syntax_aggregate(self, query: exp.Select) -> exp.Select:
9664        self._match_text_seq("AGGREGATE")
9665        query = self._parse_pipe_syntax_aggregate_group_order_by(query, group_by_exists=False)
9666
9667        if self._match(TokenType.GROUP_BY) or (
9668            self._match_text_seq("GROUP", "AND") and self._match(TokenType.ORDER_BY)
9669        ):
9670            query = self._parse_pipe_syntax_aggregate_group_order_by(query)
9671
9672        return self._build_pipe_cte(query=query, expressions=[exp.Star()])
9673
9674    def _parse_pipe_syntax_set_operator(self, query: exp.Query) -> exp.Query | None:
9675        first_setop = self.parse_set_operation(this=query)
9676        if not first_setop:
9677            return None
9678
9679        def _parse_and_unwrap_query() -> exp.Expr | None:
9680            expr = self._parse_paren()
9681            return expr.assert_is(exp.Subquery).unnest() if expr else None
9682
9683        first_setop.this.pop()
9684
9685        setops = [
9686            first_setop.expression.pop().assert_is(exp.Subquery).unnest(),
9687            *self._parse_csv(_parse_and_unwrap_query),
9688        ]
9689
9690        query = self._build_pipe_cte(query=query, expressions=[exp.Star()])
9691        with_ = query.args.get("with_")
9692        ctes = with_.pop() if with_ else None
9693
9694        if isinstance(first_setop, exp.Union):
9695            query = query.union(*setops, copy=False, **first_setop.args)
9696        elif isinstance(first_setop, exp.Except):
9697            query = query.except_(*setops, copy=False, **first_setop.args)
9698        else:
9699            query = query.intersect(*setops, copy=False, **first_setop.args)
9700
9701        query.set("with_", ctes)
9702
9703        return self._build_pipe_cte(query=query, expressions=[exp.Star()])
9704
9705    def _parse_pipe_syntax_join(self, query: exp.Query) -> exp.Query | None:
9706        join = self._parse_join()
9707        if not join:
9708            return None
9709
9710        if isinstance(query, exp.Select):
9711            return query.join(join, copy=False)
9712
9713        return query
9714
9715    def _parse_pipe_syntax_pivot(self, query: exp.Select) -> exp.Select:
9716        pivots = self._parse_pivots()
9717        if not pivots:
9718            return query
9719
9720        from_ = query.args.get("from_")
9721        if from_:
9722            from_.this.set("pivots", pivots)
9723
9724        return self._build_pipe_cte(query=query, expressions=[exp.Star()])
9725
9726    def _parse_pipe_syntax_extend(self, query: exp.Select) -> exp.Select:
9727        self._match_text_seq("EXTEND")
9728        query.select(*[exp.Star(), *self._parse_expressions()], append=False, copy=False)
9729        return self._build_pipe_cte(query=query, expressions=[exp.Star()])
9730
9731    def _parse_pipe_syntax_tablesample(self, query: exp.Select) -> exp.Select:
9732        sample = self._parse_table_sample()
9733
9734        with_ = query.args.get("with_")
9735        if with_:
9736            with_.expressions[-1].this.set("sample", sample)
9737        else:
9738            query.set("sample", sample)
9739
9740        return query
9741
9742    def _parse_pipe_syntax_query(self, query: exp.Query) -> exp.Query | None:
9743        if isinstance(query, exp.Subquery):
9744            query = exp.select("*").from_(query, copy=False)
9745
9746        if not query.args.get("from_"):
9747            query = exp.select("*").from_(query.subquery(copy=False), copy=False)
9748
9749        while self._match(TokenType.PIPE_GT):
9750            start_index = self._index
9751            start_text = self._curr.text.upper()
9752            parser = self.PIPE_SYNTAX_TRANSFORM_PARSERS.get(start_text)
9753            if not parser:
9754                # The set operators (UNION, etc) and the JOIN operator have a few common starting
9755                # keywords, making it tricky to disambiguate them without lookahead. The approach
9756                # here is to try and parse a set operation and if that fails, then try to parse a
9757                # join operator. If that fails as well, then the operator is not supported.
9758                parsed_query = self._parse_pipe_syntax_set_operator(query)
9759                parsed_query = parsed_query or self._parse_pipe_syntax_join(query)
9760                if not parsed_query:
9761                    self._retreat(start_index)
9762                    self.raise_error(f"Unsupported pipe syntax operator: '{start_text}'.")
9763                    break
9764                query = parsed_query
9765            else:
9766                query = parser(self, query)
9767
9768        return query
9769
9770    def _parse_declareitem(self) -> exp.DeclareItem | None:
9771        self._match_texts(("VAR", "VARIABLE"))
9772
9773        vars = self._parse_csv(self._parse_id_var)
9774        if not vars:
9775            return None
9776
9777        self._match(TokenType.ALIAS)
9778        kind = self._parse_schema() if self._match(TokenType.TABLE) else self._parse_types()
9779        default = (
9780            self._match(TokenType.DEFAULT) or self._match(TokenType.EQ)
9781        ) and self._parse_bitwise()
9782
9783        return self.expression(exp.DeclareItem(this=vars, kind=kind, default=default))
9784
9785    def _parse_declare(self) -> exp.Declare | exp.Command:
9786        start = self._prev
9787        replace = self._match_text_seq("OR", "REPLACE")
9788        expressions = self._try_parse(lambda: self._parse_csv(self._parse_declareitem))
9789
9790        if not expressions or self._curr:
9791            return self._parse_as_command(start)
9792
9793        return self.expression(exp.Declare(expressions=expressions, replace=replace))
9794
9795    def build_cast(self, strict: bool, **kwargs) -> exp.Cast:
9796        exp_class = exp.Cast if strict else exp.TryCast
9797
9798        if exp_class == exp.TryCast:
9799            kwargs["requires_string"] = self.dialect.TRY_CAST_REQUIRES_STRING
9800
9801        return self.expression(exp_class(**kwargs))
9802
9803    def _parse_json_value(self) -> exp.JSONValue:
9804        this = self._parse_bitwise()
9805        self._match(TokenType.COMMA)
9806        path = self._parse_bitwise()
9807
9808        returning = self._match(TokenType.RETURNING) and self._parse_type()
9809
9810        return self.expression(
9811            exp.JSONValue(
9812                this=this,
9813                path=self.dialect.to_json_path(path),
9814                returning=returning,
9815                on_condition=self._parse_on_condition(),
9816            )
9817        )
9818
9819    def _parse_group_concat(self) -> exp.Expr | None:
9820        def concat_exprs(node: exp.Expr | None, exprs: list[exp.Expr]) -> exp.Expr:
9821            if isinstance(node, exp.Distinct) and len(node.expressions) > 1:
9822                concat_exprs = [
9823                    self.expression(
9824                        exp.Concat(
9825                            expressions=node.expressions,
9826                            safe=True,
9827                            coalesce=self.dialect.CONCAT_COALESCE,
9828                        )
9829                    )
9830                ]
9831                node.set("expressions", concat_exprs)
9832                return node
9833            if len(exprs) == 1:
9834                return exprs[0]
9835            return self.expression(
9836                exp.Concat(expressions=args, safe=True, coalesce=self.dialect.CONCAT_COALESCE)
9837            )
9838
9839        args = self._parse_csv(self._parse_lambda)
9840
9841        if args:
9842            order = args[-1] if isinstance(args[-1], exp.Order) else None
9843
9844            if order:
9845                # Order By is the last (or only) expression in the list and has consumed the 'expr' before it,
9846                # remove 'expr' from exp.Order and add it back to args
9847                args[-1] = order.this
9848                order.set("this", concat_exprs(order.this, args))
9849
9850            this = order or concat_exprs(args[0], args)
9851        else:
9852            this = None
9853
9854        separator = self._parse_field() if self._match(TokenType.SEPARATOR) else None
9855
9856        return self.expression(exp.GroupConcat(this=this, separator=separator))
9857
9858    def _parse_initcap(self) -> exp.Initcap:
9859        expr = exp.Initcap.from_arg_list(self._parse_function_args())
9860
9861        # attach dialect's default delimiters
9862        if expr.args.get("expression") is None:
9863            expr.set("expression", exp.Literal.string(self.dialect.INITCAP_DEFAULT_DELIMITER_CHARS))
9864
9865        return expr
9866
9867    def _parse_operator(self, this: exp.Expr | None) -> exp.Expr | None:
9868        while True:
9869            if not self._match(TokenType.L_PAREN):
9870                break
9871
9872            op = ""
9873            while self._curr and not self._match(TokenType.R_PAREN):
9874                op += self._curr.text
9875                self._advance()
9876
9877            comments = self._prev_comments
9878            this = self.expression(
9879                exp.Operator(this=this, operator=op, expression=self._parse_bitwise()),
9880                comments=comments,
9881            )
9882
9883            if not self._match(TokenType.OPERATOR):
9884                break
9885
9886        return this

logger = <Logger sqlglot (WARNING)>

OPTIONS_TYPE = dict[str, collections.abc.Sequence[typing.Union[collections.abc.Sequence[str], str]]]

TIME_ZONE_RE: re.Pattern[str] = re.compile(':.*?[a-zA-Z\\+\\-]')

def build_var_map( args: Sequence[typing.Any]) -> sqlglot.expressions.array.StarMap | sqlglot.expressions.array.VarMap: View Source

46def build_var_map(args: BuilderArgs) -> exp.StarMap | exp.VarMap:
47    if len(args) == 1 and args[0].is_star:
48        return exp.StarMap(this=args[0])
49
50    keys: list[ExpOrStr] = []
51    values: list[ExpOrStr] = []
52    for i in range(0, len(args), 2):
53        keys.append(args[i])
54        values.append(args[i + 1])
55
56    return exp.VarMap(keys=exp.array(*keys, copy=False), values=exp.array(*values, copy=False))

def build_like( args: Sequence[typing.Any]) -> sqlglot.expressions.core.Escape | sqlglot.expressions.core.Like: View Source

59def build_like(args: BuilderArgs) -> exp.Escape | exp.Like:
60    like = exp.Like(this=seq_get(args, 1), expression=seq_get(args, 0))
61    return exp.Escape(this=like, expression=seq_get(args, 2)) if len(args) > 2 else like

def binary_range_parser( expr_type: type[sqlglot.expressions.core.Expr], reverse_args: bool = False) -> Callable[[Parser, sqlglot.expressions.core.Expr | None], sqlglot.expressions.core.Expr | None]: View Source

64def binary_range_parser(
65    expr_type: Type[exp.Expr], reverse_args: bool = False
66) -> t.Callable[[Parser, exp.Expr | None], exp.Expr | None]:
67    def _parse_binary_range(self: Parser, this: exp.Expr | None) -> exp.Expr | None:
68        expression = self._parse_bitwise()
69        if reverse_args:
70            this, expression = expression, this
71        return self._parse_escape(self.expression(expr_type(this=this, expression=expression)))
72
73    return _parse_binary_range

def build_logarithm( args: Sequence[typing.Any], dialect: sqlglot.dialects.Dialect) -> sqlglot.expressions.core.Func: View Source

76def build_logarithm(args: BuilderArgs, dialect: Dialect) -> exp.Func:
77    # Default argument order is base, expression
78    this = seq_get(args, 0)
79    expression = seq_get(args, 1)
80
81    if expression:
82        if not dialect.LOG_BASE_FIRST:
83            this, expression = expression, this
84        return exp.Log(this=this, expression=expression)
85
86    return (exp.Ln if dialect.parser_class.LOG_DEFAULTS_TO_LN else exp.Log)(this=this)

def build_hex( args: Sequence[typing.Any], dialect: sqlglot.dialects.Dialect) -> sqlglot.expressions.string.Hex | sqlglot.expressions.string.LowerHex: View Source

89def build_hex(args: BuilderArgs, dialect: Dialect) -> exp.Hex | exp.LowerHex:
90    arg = seq_get(args, 0)
91    return exp.LowerHex(this=arg) if dialect.HEX_LOWERCASE else exp.Hex(this=arg)

def build_lower( args: Sequence[typing.Any]) -> sqlglot.expressions.string.Lower | sqlglot.expressions.string.Hex: View Source

94def build_lower(args: BuilderArgs) -> exp.Lower | exp.Hex:
95    # LOWER(HEX(..)) can be simplified to LowerHex to simplify its transpilation
96    arg = seq_get(args, 0)
97    return exp.LowerHex(this=arg.this) if isinstance(arg, exp.Hex) else exp.Lower(this=arg)

def build_upper( args: Sequence[typing.Any]) -> sqlglot.expressions.string.Upper | sqlglot.expressions.string.Hex: View Source

100def build_upper(args: BuilderArgs) -> exp.Upper | exp.Hex:
101    # UPPER(HEX(..)) can be simplified to Hex to simplify its transpilation
102    arg = seq_get(args, 0)
103    return exp.Hex(this=arg.this) if isinstance(arg, exp.Hex) else exp.Upper(this=arg)

def build_extract_json_with_path( expr_type: type[~E]) -> Callable[[Sequence[Any], sqlglot.dialects.Dialect], ~E]: View Source

106def build_extract_json_with_path(
107    expr_type: Type[E],
108) -> t.Callable[[BuilderArgs, Dialect], E]:
109    def _builder(args: BuilderArgs, dialect: Dialect) -> E:
110        expression = expr_type(
111            this=seq_get(args, 0), expression=dialect.to_json_path(seq_get(args, 1))
112        )
113        if len(args) > 2 and expr_type is exp.JSONExtract:
114            expression.set("expressions", args[2:])
115        if expr_type is exp.JSONExtractScalar:
116            expression.set("scalar_only", dialect.JSON_EXTRACT_SCALAR_SCALAR_ONLY)
117
118        return expression
119
120    return _builder

def build_mod(args: Sequence[typing.Any]) -> sqlglot.expressions.core.Mod: View Source

123def build_mod(args: BuilderArgs) -> exp.Mod:
124    this = seq_get(args, 0)
125    expression = seq_get(args, 1)
126
127    # Wrap the operands if they are binary nodes, e.g. MOD(a + 1, 7) -> (a + 1) % 7
128    this = exp.Paren(this=this) if isinstance(this, exp.Binary) else this
129    expression = exp.Paren(this=expression) if isinstance(expression, exp.Binary) else expression
130
131    return exp.Mod(this=this, expression=expression)

def build_pad(args: Sequence[typing.Any], is_left: bool = True): View Source

134def build_pad(args: BuilderArgs, is_left: bool = True):
135    return exp.Pad(
136        this=seq_get(args, 0),
137        expression=seq_get(args, 1),
138        fill_pattern=seq_get(args, 2),
139        is_left=is_left,
140    )

def build_array_constructor( exp_class: type[~E], args: list[typing.Any], bracket_kind: sqlglot.tokenizer_core.TokenType, dialect: sqlglot.dialects.Dialect) -> sqlglot.expressions.core.Expr: View Source

143def build_array_constructor(
144    exp_class: Type[E], args: list[t.Any], bracket_kind: TokenType, dialect: Dialect
145) -> exp.Expr:
146    array_exp = exp_class(expressions=args)
147
148    if exp_class == exp.Array and dialect.HAS_DISTINCT_ARRAY_CONSTRUCTORS:
149        array_exp.set("bracket_notation", bracket_kind == TokenType.L_BRACKET)
150
151    return array_exp

def build_convert_timezone( args: Sequence[typing.Any], default_source_tz: str | None = None) -> sqlglot.expressions.temporal.ConvertTimezone | sqlglot.expressions.core.Anonymous: View Source

154def build_convert_timezone(
155    args: BuilderArgs, default_source_tz: str | None = None
156) -> exp.ConvertTimezone | exp.Anonymous:
157    if len(args) == 2:
158        source_tz = exp.Literal.string(default_source_tz) if default_source_tz else None
159        return exp.ConvertTimezone(
160            source_tz=source_tz, target_tz=seq_get(args, 0), timestamp=seq_get(args, 1)
161        )
162
163    return exp.ConvertTimezone.from_arg_list(args)

def build_trim( args: Sequence[typing.Any], is_left: bool = True, reverse_args: bool = False) -> sqlglot.expressions.string.Trim: View Source

166def build_trim(args: BuilderArgs, is_left: bool = True, reverse_args: bool = False) -> exp.Trim:
167    this, expression = seq_get(args, 0), seq_get(args, 1)
168
169    if expression and reverse_args:
170        this, expression = expression, this
171
172    return exp.Trim(this=this, expression=expression, position="LEADING" if is_left else "TRAILING")

def build_coalesce( args: Sequence[typing.Any], is_nvl: bool | None = None, is_null: bool | None = None) -> sqlglot.expressions.functions.Coalesce: View Source

175def build_coalesce(
176    args: BuilderArgs, is_nvl: bool | None = None, is_null: bool | None = None
177) -> exp.Coalesce:
178    return exp.Coalesce(this=seq_get(args, 0), expressions=args[1:], is_nvl=is_nvl, is_null=is_null)

def build_locate_strposition(args: Sequence[typing.Any]) -> sqlglot.expressions.string.StrPosition: View Source

181def build_locate_strposition(args: BuilderArgs) -> exp.StrPosition:
182    return exp.StrPosition(
183        this=seq_get(args, 1),
184        substr=seq_get(args, 0),
185        position=seq_get(args, 2),
186    )

def build_array_append( args: Sequence[typing.Any], dialect: sqlglot.dialects.Dialect) -> sqlglot.expressions.array.ArrayAppend: View Source

189def build_array_append(args: BuilderArgs, dialect: Dialect) -> exp.ArrayAppend:
190    """
191    Builds ArrayAppend with NULL propagation semantics based on the dialect configuration.
192
193    Some dialects (Databricks, Spark, Snowflake) return NULL when the input array is NULL.
194    Others (DuckDB, PostgreSQL) create a new single-element array instead.
195
196    Args:
197        args: Function arguments [array, element]
198        dialect: The dialect to read ARRAY_FUNCS_PROPAGATES_NULLS from
199
200    Returns:
201        ArrayAppend expression with appropriate null_propagation flag
202    """
203    return exp.ArrayAppend(
204        this=seq_get(args, 0),
205        expression=seq_get(args, 1),
206        null_propagation=dialect.ARRAY_FUNCS_PROPAGATES_NULLS,
207    )

Builds ArrayAppend with NULL propagation semantics based on the dialect configuration.

Some dialects (Databricks, Spark, Snowflake) return NULL when the input array is NULL. Others (DuckDB, PostgreSQL) create a new single-element array instead.

Arguments:

args: Function arguments [array, element]
dialect: The dialect to read ARRAY_FUNCS_PROPAGATES_NULLS from

Returns:

ArrayAppend expression with appropriate null_propagation flag

def build_array_prepend( args: Sequence[typing.Any], dialect: sqlglot.dialects.Dialect) -> sqlglot.expressions.array.ArrayPrepend: View Source

210def build_array_prepend(args: BuilderArgs, dialect: Dialect) -> exp.ArrayPrepend:
211    """
212    Builds ArrayPrepend with NULL propagation semantics based on the dialect configuration.
213
214    Some dialects (Databricks, Spark, Snowflake) return NULL when the input array is NULL.
215    Others (DuckDB, PostgreSQL) create a new single-element array instead.
216
217    Args:
218        args: Function arguments [array, element]
219        dialect: The dialect to read ARRAY_FUNCS_PROPAGATES_NULLS from
220
221    Returns:
222        ArrayPrepend expression with appropriate null_propagation flag
223    """
224    return exp.ArrayPrepend(
225        this=seq_get(args, 0),
226        expression=seq_get(args, 1),
227        null_propagation=dialect.ARRAY_FUNCS_PROPAGATES_NULLS,
228    )

Builds ArrayPrepend with NULL propagation semantics based on the dialect configuration.

Some dialects (Databricks, Spark, Snowflake) return NULL when the input array is NULL. Others (DuckDB, PostgreSQL) create a new single-element array instead.

Arguments:

args: Function arguments [array, element]
dialect: The dialect to read ARRAY_FUNCS_PROPAGATES_NULLS from

Returns:

ArrayPrepend expression with appropriate null_propagation flag

def build_array_concat( args: Sequence[typing.Any], dialect: sqlglot.dialects.Dialect) -> sqlglot.expressions.array.ArrayConcat: View Source

231def build_array_concat(args: BuilderArgs, dialect: Dialect) -> exp.ArrayConcat:
232    """
233    Builds ArrayConcat with NULL propagation semantics based on the dialect configuration.
234
235    Some dialects (Redshift, Snowflake) return NULL when any input array is NULL.
236    Others (DuckDB, PostgreSQL) skip NULL arrays and continue concatenation.
237
238    Args:
239        args: Function arguments [array1, array2, ...] (variadic)
240        dialect: The dialect to read ARRAY_FUNCS_PROPAGATES_NULLS from
241
242    Returns:
243        ArrayConcat expression with appropriate null_propagation flag
244    """
245    return exp.ArrayConcat(
246        this=seq_get(args, 0),
247        expressions=args[1:],
248        null_propagation=dialect.ARRAY_FUNCS_PROPAGATES_NULLS,
249    )

Builds ArrayConcat with NULL propagation semantics based on the dialect configuration.

Some dialects (Redshift, Snowflake) return NULL when any input array is NULL. Others (DuckDB, PostgreSQL) skip NULL arrays and continue concatenation.

Arguments:

args: Function arguments [array1, array2, ...] (variadic)
dialect: The dialect to read ARRAY_FUNCS_PROPAGATES_NULLS from

Returns:

ArrayConcat expression with appropriate null_propagation flag

def build_array_remove( args: Sequence[typing.Any], dialect: sqlglot.dialects.Dialect) -> sqlglot.expressions.array.ArrayRemove: View Source

252def build_array_remove(args: BuilderArgs, dialect: Dialect) -> exp.ArrayRemove:
253    """
254    Builds ArrayRemove with NULL propagation semantics based on the dialect configuration.
255
256    Some dialects (Snowflake) return NULL when the removal value is NULL.
257    Others (DuckDB) may return empty array due to NULL comparison semantics.
258
259    Args:
260        args: Function arguments [array, value_to_remove]
261        dialect: The dialect to read ARRAY_FUNCS_PROPAGATES_NULLS from
262
263    Returns:
264        ArrayRemove expression with appropriate null_propagation flag
265    """
266    return exp.ArrayRemove(
267        this=seq_get(args, 0),
268        expression=seq_get(args, 1),
269        null_propagation=dialect.ARRAY_FUNCS_PROPAGATES_NULLS,
270    )

Builds ArrayRemove with NULL propagation semantics based on the dialect configuration.

Some dialects (Snowflake) return NULL when the removal value is NULL. Others (DuckDB) may return empty array due to NULL comparison semantics.

Arguments:

args: Function arguments [array, value_to_remove]
dialect: The dialect to read ARRAY_FUNCS_PROPAGATES_NULLS from

Returns:

ArrayRemove expression with appropriate null_propagation flag

SENTINEL_NONE: sqlglot.tokenizer_core.Token = <Token token_type: TokenType.SENTINEL, text: SENTINEL, line: 1, col: 1, start: 0, end: 0, comments: []>