sqlglot.parser
1from __future__ import annotations 2 3import itertools 4import logging 5import re 6import typing as t 7from collections import defaultdict 8 9from sqlglot import exp 10from sqlglot.errors import ( 11 ErrorLevel, 12 ParseError, 13 TokenError, 14 concat_messages, 15 highlight_sql, 16 merge_errors, 17) 18from sqlglot.expressions import apply_index_offset 19from sqlglot.helper import ensure_list, i64, seq_get 20from sqlglot.trie import new_trie 21from sqlglot.time import format_time 22from sqlglot.tokens import Token, Tokenizer, TokenType 23from sqlglot.trie import TrieResult, in_trie 24from collections.abc import Sequence 25from builtins import type as Type 26 27if t.TYPE_CHECKING: 28 from sqlglot.expressions import ExpOrStr 29 from sqlglot._typing import E, BuilderArgs 30 from sqlglot.dialects.dialect import Dialect, DialectType 31 32 from re import Pattern 33 34 T = t.TypeVar("T") 35 TCeilFloor = t.TypeVar("TCeilFloor", exp.Ceil, exp.Floor) 36 37logger = logging.getLogger("sqlglot") 38 39OPTIONS_TYPE = dict[str, Sequence[t.Union[Sequence[str], str]]] 40 41# Used to detect alphabetical characters and +/- in timestamp literals 42TIME_ZONE_RE: Pattern[str] = re.compile(r":.*?[a-zA-Z\+\-]") 43 44 45def build_var_map(args: BuilderArgs) -> exp.StarMap | exp.VarMap: 46 if len(args) == 1 and args[0].is_star: 47 return exp.StarMap(this=args[0]) 48 49 keys: list[ExpOrStr] = [] 50 values: list[ExpOrStr] = [] 51 for i in range(0, len(args), 2): 52 keys.append(args[i]) 53 values.append(args[i + 1]) 54 55 return exp.VarMap(keys=exp.array(*keys, copy=False), values=exp.array(*values, copy=False)) 56 57 58def build_like(args: BuilderArgs) -> exp.Escape | exp.Like: 59 like = exp.Like(this=seq_get(args, 1), expression=seq_get(args, 0)) 60 return exp.Escape(this=like, expression=seq_get(args, 2)) if len(args) > 2 else like 61 62 63def binary_range_parser( 64 expr_type: Type[exp.Expr], reverse_args: bool = False 65) -> t.Callable[[Parser, exp.Expr | None], exp.Expr | None]: 66 def _parse_binary_range(self: Parser, this: exp.Expr | None) -> exp.Expr | None: 67 expression = self._parse_bitwise() 68 if reverse_args: 69 this, expression = expression, this 70 return self._parse_escape(self.expression(expr_type(this=this, expression=expression))) 71 72 return _parse_binary_range 73 74 75def build_logarithm(args: BuilderArgs, dialect: Dialect) -> exp.Func: 76 # Default argument order is base, expression 77 this = seq_get(args, 0) 78 expression = seq_get(args, 1) 79 80 if expression: 81 if not dialect.LOG_BASE_FIRST: 82 this, expression = expression, this 83 return exp.Log(this=this, expression=expression) 84 85 return (exp.Ln if dialect.parser_class.LOG_DEFAULTS_TO_LN else exp.Log)(this=this) 86 87 88def build_hex(args: BuilderArgs, dialect: Dialect) -> exp.Hex | exp.LowerHex: 89 arg = seq_get(args, 0) 90 return exp.LowerHex(this=arg) if dialect.HEX_LOWERCASE else exp.Hex(this=arg) 91 92 93def build_lower(args: BuilderArgs) -> exp.Lower | exp.Hex: 94 # LOWER(HEX(..)) can be simplified to LowerHex to simplify its transpilation 95 arg = seq_get(args, 0) 96 return exp.LowerHex(this=arg.this) if isinstance(arg, exp.Hex) else exp.Lower(this=arg) 97 98 99def build_upper(args: BuilderArgs) -> exp.Upper | exp.Hex: 100 # UPPER(HEX(..)) can be simplified to Hex to simplify its transpilation 101 arg = seq_get(args, 0) 102 return exp.Hex(this=arg.this) if isinstance(arg, exp.Hex) else exp.Upper(this=arg) 103 104 105def build_extract_json_with_path( 106 expr_type: Type[E], 107) -> t.Callable[[BuilderArgs, Dialect], E]: 108 def _builder(args: BuilderArgs, dialect: Dialect) -> E: 109 expression = expr_type( 110 this=seq_get(args, 0), expression=dialect.to_json_path(seq_get(args, 1)) 111 ) 112 if len(args) > 2 and expr_type is exp.JSONExtract: 113 expression.set("expressions", args[2:]) 114 if expr_type is exp.JSONExtractScalar: 115 expression.set("scalar_only", dialect.JSON_EXTRACT_SCALAR_SCALAR_ONLY) 116 117 return expression 118 119 return _builder 120 121 122def build_mod(args: BuilderArgs) -> exp.Mod: 123 this = seq_get(args, 0) 124 expression = seq_get(args, 1) 125 126 # Wrap the operands if they are binary nodes, e.g. MOD(a + 1, 7) -> (a + 1) % 7 127 this = exp.Paren(this=this) if isinstance(this, exp.Binary) else this 128 expression = exp.Paren(this=expression) if isinstance(expression, exp.Binary) else expression 129 130 return exp.Mod(this=this, expression=expression) 131 132 133def build_pad(args: BuilderArgs, is_left: bool = True): 134 return exp.Pad( 135 this=seq_get(args, 0), 136 expression=seq_get(args, 1), 137 fill_pattern=seq_get(args, 2), 138 is_left=is_left, 139 ) 140 141 142def build_array_constructor( 143 exp_class: Type[E], args: list[t.Any], bracket_kind: TokenType, dialect: Dialect 144) -> exp.Expr: 145 array_exp = exp_class(expressions=args) 146 147 if exp_class == exp.Array and dialect.HAS_DISTINCT_ARRAY_CONSTRUCTORS: 148 array_exp.set("bracket_notation", bracket_kind == TokenType.L_BRACKET) 149 150 return array_exp 151 152 153def build_convert_timezone( 154 args: BuilderArgs, default_source_tz: str | None = None 155) -> exp.ConvertTimezone | exp.Anonymous: 156 if len(args) == 2: 157 source_tz = exp.Literal.string(default_source_tz) if default_source_tz else None 158 return exp.ConvertTimezone( 159 source_tz=source_tz, target_tz=seq_get(args, 0), timestamp=seq_get(args, 1) 160 ) 161 162 return exp.ConvertTimezone.from_arg_list(args) 163 164 165def build_trim(args: BuilderArgs, is_left: bool = True, reverse_args: bool = False) -> exp.Trim: 166 this, expression = seq_get(args, 0), seq_get(args, 1) 167 168 if expression and reverse_args: 169 this, expression = expression, this 170 171 return exp.Trim(this=this, expression=expression, position="LEADING" if is_left else "TRAILING") 172 173 174def build_coalesce( 175 args: BuilderArgs, is_nvl: bool | None = None, is_null: bool | None = None 176) -> exp.Coalesce: 177 return exp.Coalesce(this=seq_get(args, 0), expressions=args[1:], is_nvl=is_nvl, is_null=is_null) 178 179 180def build_locate_strposition(args: BuilderArgs) -> exp.StrPosition: 181 return exp.StrPosition( 182 this=seq_get(args, 1), 183 substr=seq_get(args, 0), 184 position=seq_get(args, 2), 185 ) 186 187 188def build_array_append(args: BuilderArgs, dialect: Dialect) -> exp.ArrayAppend: 189 """ 190 Builds ArrayAppend with NULL propagation semantics based on the dialect configuration. 191 192 Some dialects (Databricks, Spark, Snowflake) return NULL when the input array is NULL. 193 Others (DuckDB, PostgreSQL) create a new single-element array instead. 194 195 Args: 196 args: Function arguments [array, element] 197 dialect: The dialect to read ARRAY_FUNCS_PROPAGATES_NULLS from 198 199 Returns: 200 ArrayAppend expression with appropriate null_propagation flag 201 """ 202 return exp.ArrayAppend( 203 this=seq_get(args, 0), 204 expression=seq_get(args, 1), 205 null_propagation=dialect.ARRAY_FUNCS_PROPAGATES_NULLS, 206 ) 207 208 209def build_array_prepend(args: BuilderArgs, dialect: Dialect) -> exp.ArrayPrepend: 210 """ 211 Builds ArrayPrepend with NULL propagation semantics based on the dialect configuration. 212 213 Some dialects (Databricks, Spark, Snowflake) return NULL when the input array is NULL. 214 Others (DuckDB, PostgreSQL) create a new single-element array instead. 215 216 Args: 217 args: Function arguments [array, element] 218 dialect: The dialect to read ARRAY_FUNCS_PROPAGATES_NULLS from 219 220 Returns: 221 ArrayPrepend expression with appropriate null_propagation flag 222 """ 223 return exp.ArrayPrepend( 224 this=seq_get(args, 0), 225 expression=seq_get(args, 1), 226 null_propagation=dialect.ARRAY_FUNCS_PROPAGATES_NULLS, 227 ) 228 229 230def build_array_concat(args: BuilderArgs, dialect: Dialect) -> exp.ArrayConcat: 231 """ 232 Builds ArrayConcat with NULL propagation semantics based on the dialect configuration. 233 234 Some dialects (Redshift, Snowflake) return NULL when any input array is NULL. 235 Others (DuckDB, PostgreSQL) skip NULL arrays and continue concatenation. 236 237 Args: 238 args: Function arguments [array1, array2, ...] (variadic) 239 dialect: The dialect to read ARRAY_FUNCS_PROPAGATES_NULLS from 240 241 Returns: 242 ArrayConcat expression with appropriate null_propagation flag 243 """ 244 return exp.ArrayConcat( 245 this=seq_get(args, 0), 246 expressions=args[1:], 247 null_propagation=dialect.ARRAY_FUNCS_PROPAGATES_NULLS, 248 ) 249 250 251def build_array_remove(args: BuilderArgs, dialect: Dialect) -> exp.ArrayRemove: 252 """ 253 Builds ArrayRemove with NULL propagation semantics based on the dialect configuration. 254 255 Some dialects (Snowflake) return NULL when the removal value is NULL. 256 Others (DuckDB) may return empty array due to NULL comparison semantics. 257 258 Args: 259 args: Function arguments [array, value_to_remove] 260 dialect: The dialect to read ARRAY_FUNCS_PROPAGATES_NULLS from 261 262 Returns: 263 ArrayRemove expression with appropriate null_propagation flag 264 """ 265 return exp.ArrayRemove( 266 this=seq_get(args, 0), 267 expression=seq_get(args, 1), 268 null_propagation=dialect.ARRAY_FUNCS_PROPAGATES_NULLS, 269 ) 270 271 272def _resolve_dialect(dialect: DialectType) -> Dialect: 273 from sqlglot.dialects.dialect import Dialect 274 275 return Dialect.get_or_raise(dialect) 276 277 278SENTINEL_NONE: Token = Token(TokenType.SENTINEL, "SENTINEL") 279 280 281class Parser: 282 """ 283 Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree. 284 285 Args: 286 error_level: The desired error level. 287 Default: ErrorLevel.IMMEDIATE 288 error_message_context: The amount of context to capture from a query string when displaying 289 the error message (in number of characters). 290 Default: 100 291 max_errors: Maximum number of error messages to include in a raised ParseError. 292 This is only relevant if error_level is ErrorLevel.RAISE. 293 Default: 3 294 max_nodes: Maximum number of AST nodes to prevent memory exhaustion. 295 Set to -1 (default) to disable the check. 296 """ 297 298 __slots__ = ( 299 "error_level", 300 "error_message_context", 301 "max_errors", 302 "max_nodes", 303 "dialect", 304 "sql", 305 "errors", 306 "_tokens", 307 "_index", 308 "_curr", 309 "_next", 310 "_prev", 311 "_prev_comments", 312 "_pipe_cte_counter", 313 "_chunks", 314 "_chunk_index", 315 "_tokens_size", 316 "_node_count", 317 ) 318 319 FUNCTIONS: t.ClassVar[dict[str, t.Callable]] = { 320 **{name: func.from_arg_list for name, func in exp.FUNCTION_BY_NAME.items()}, 321 **dict.fromkeys(("COALESCE", "IFNULL", "NVL"), build_coalesce), 322 "ARRAY": lambda args, dialect: exp.Array(expressions=args), 323 "ARRAYAGG": lambda args, dialect: exp.ArrayAgg( 324 this=seq_get(args, 0), nulls_excluded=dialect.ARRAY_AGG_INCLUDES_NULLS is None or None 325 ), 326 "ARRAY_AGG": lambda args, dialect: exp.ArrayAgg( 327 this=seq_get(args, 0), nulls_excluded=dialect.ARRAY_AGG_INCLUDES_NULLS is None or None 328 ), 329 "ARRAY_APPEND": build_array_append, 330 "ARRAY_CAT": build_array_concat, 331 "ARRAY_CONCAT": build_array_concat, 332 "ARRAY_INTERSECT": lambda args: exp.ArrayIntersect(expressions=args), 333 "ARRAY_INTERSECTION": lambda args: exp.ArrayIntersect(expressions=args), 334 "ARRAY_PREPEND": build_array_prepend, 335 "ARRAY_REMOVE": build_array_remove, 336 "COUNT": lambda args: exp.Count(this=seq_get(args, 0), expressions=args[1:], big_int=True), 337 "CONCAT": lambda args, dialect: exp.Concat( 338 expressions=args, 339 safe=not dialect.STRICT_STRING_CONCAT, 340 coalesce=dialect.CONCAT_COALESCE, 341 ), 342 "CONCAT_WS": lambda args, dialect: exp.ConcatWs( 343 expressions=args, 344 safe=not dialect.STRICT_STRING_CONCAT, 345 coalesce=dialect.CONCAT_COALESCE, 346 ), 347 "CONVERT_TIMEZONE": build_convert_timezone, 348 "DATE_TO_DATE_STR": lambda args: exp.Cast( 349 this=seq_get(args, 0), 350 to=exp.DataType(this=exp.DType.TEXT), 351 ), 352 "GENERATE_DATE_ARRAY": lambda args: exp.GenerateDateArray( 353 start=seq_get(args, 0), 354 end=seq_get(args, 1), 355 step=seq_get(args, 2) or exp.Interval(this=exp.Literal.string(1), unit=exp.var("DAY")), 356 ), 357 "GENERATE_UUID": lambda args, dialect: exp.Uuid( 358 is_string=dialect.UUID_IS_STRING_TYPE or None 359 ), 360 "GLOB": lambda args: exp.Glob(this=seq_get(args, 1), expression=seq_get(args, 0)), 361 "GREATEST": lambda args, dialect: exp.Greatest( 362 this=seq_get(args, 0), 363 expressions=args[1:], 364 ignore_nulls=dialect.LEAST_GREATEST_IGNORES_NULLS, 365 ), 366 "LEAST": lambda args, dialect: exp.Least( 367 this=seq_get(args, 0), 368 expressions=args[1:], 369 ignore_nulls=dialect.LEAST_GREATEST_IGNORES_NULLS, 370 ), 371 "HEX": build_hex, 372 "JSON_EXTRACT": build_extract_json_with_path(exp.JSONExtract), 373 "JSON_EXTRACT_SCALAR": build_extract_json_with_path(exp.JSONExtractScalar), 374 "JSON_EXTRACT_PATH_TEXT": build_extract_json_with_path(exp.JSONExtractScalar), 375 "JSON_KEYS": lambda args, dialect: exp.JSONKeys( 376 this=seq_get(args, 0), expression=dialect.to_json_path(seq_get(args, 1)) 377 ), 378 "LIKE": build_like, 379 "LOG": build_logarithm, 380 "LOG2": lambda args: exp.Log(this=exp.Literal.number(2), expression=seq_get(args, 0)), 381 "LOG10": lambda args: exp.Log(this=exp.Literal.number(10), expression=seq_get(args, 0)), 382 "LOWER": build_lower, 383 "LPAD": lambda args: build_pad(args), 384 "LEFTPAD": lambda args: build_pad(args), 385 "LTRIM": lambda args: build_trim(args), 386 "MOD": build_mod, 387 "RIGHTPAD": lambda args: build_pad(args, is_left=False), 388 "RPAD": lambda args: build_pad(args, is_left=False), 389 "RTRIM": lambda args: build_trim(args, is_left=False), 390 "SCOPE_RESOLUTION": lambda args: ( 391 exp.ScopeResolution(expression=seq_get(args, 0)) 392 if len(args) != 2 393 else exp.ScopeResolution(this=seq_get(args, 0), expression=seq_get(args, 1)) 394 ), 395 "STRPOS": exp.StrPosition.from_arg_list, 396 "CHARINDEX": lambda args: build_locate_strposition(args), 397 "INSTR": exp.StrPosition.from_arg_list, 398 "LOCATE": lambda args: build_locate_strposition(args), 399 "TIME_TO_TIME_STR": lambda args: exp.Cast( 400 this=seq_get(args, 0), 401 to=exp.DataType(this=exp.DType.TEXT), 402 ), 403 "TO_HEX": build_hex, 404 "TS_OR_DS_TO_DATE_STR": lambda args: exp.Substring( 405 this=exp.Cast( 406 this=seq_get(args, 0), 407 to=exp.DataType(this=exp.DType.TEXT), 408 ), 409 start=exp.Literal.number(1), 410 length=exp.Literal.number(10), 411 ), 412 "UNNEST": lambda args: exp.Unnest(expressions=ensure_list(seq_get(args, 0))), 413 "UPPER": build_upper, 414 "UUID": lambda args, dialect: exp.Uuid(is_string=dialect.UUID_IS_STRING_TYPE or None), 415 "VAR_MAP": build_var_map, 416 } 417 418 NO_PAREN_FUNCTIONS: t.ClassVar[dict] = { 419 TokenType.CURRENT_DATE: exp.CurrentDate, 420 TokenType.CURRENT_DATETIME: exp.CurrentDate, 421 TokenType.CURRENT_TIME: exp.CurrentTime, 422 TokenType.CURRENT_TIMESTAMP: exp.CurrentTimestamp, 423 TokenType.CURRENT_USER: exp.CurrentUser, 424 TokenType.CURRENT_ROLE: exp.CurrentRole, 425 } 426 427 STRUCT_TYPE_TOKENS: t.ClassVar = { 428 TokenType.NESTED, 429 TokenType.OBJECT, 430 TokenType.STRUCT, 431 TokenType.UNION, 432 } 433 434 NESTED_TYPE_TOKENS: t.ClassVar = { 435 TokenType.ARRAY, 436 TokenType.LIST, 437 TokenType.LOWCARDINALITY, 438 TokenType.MAP, 439 TokenType.NULLABLE, 440 TokenType.RANGE, 441 *STRUCT_TYPE_TOKENS, 442 } 443 444 ENUM_TYPE_TOKENS: t.ClassVar = { 445 TokenType.DYNAMIC, 446 TokenType.ENUM, 447 TokenType.ENUM8, 448 TokenType.ENUM16, 449 } 450 451 AGGREGATE_TYPE_TOKENS: t.ClassVar = { 452 TokenType.AGGREGATEFUNCTION, 453 TokenType.SIMPLEAGGREGATEFUNCTION, 454 } 455 456 TYPE_TOKENS: t.ClassVar = { 457 TokenType.BIT, 458 TokenType.BOOLEAN, 459 TokenType.TINYINT, 460 TokenType.UTINYINT, 461 TokenType.SMALLINT, 462 TokenType.USMALLINT, 463 TokenType.INT, 464 TokenType.UINT, 465 TokenType.BIGINT, 466 TokenType.UBIGINT, 467 TokenType.BIGNUM, 468 TokenType.INT128, 469 TokenType.UINT128, 470 TokenType.INT256, 471 TokenType.UINT256, 472 TokenType.MEDIUMINT, 473 TokenType.UMEDIUMINT, 474 TokenType.FIXEDSTRING, 475 TokenType.FLOAT, 476 TokenType.DOUBLE, 477 TokenType.UDOUBLE, 478 TokenType.CHAR, 479 TokenType.NCHAR, 480 TokenType.VARCHAR, 481 TokenType.NVARCHAR, 482 TokenType.BPCHAR, 483 TokenType.TEXT, 484 TokenType.MEDIUMTEXT, 485 TokenType.LONGTEXT, 486 TokenType.BLOB, 487 TokenType.MEDIUMBLOB, 488 TokenType.LONGBLOB, 489 TokenType.BINARY, 490 TokenType.VARBINARY, 491 TokenType.JSON, 492 TokenType.JSONB, 493 TokenType.INTERVAL, 494 TokenType.TINYBLOB, 495 TokenType.TINYTEXT, 496 TokenType.TIME, 497 TokenType.TIMETZ, 498 TokenType.TIME_NS, 499 TokenType.TIMESTAMP, 500 TokenType.TIMESTAMP_S, 501 TokenType.TIMESTAMP_MS, 502 TokenType.TIMESTAMP_NS, 503 TokenType.TIMESTAMPTZ, 504 TokenType.TIMESTAMPLTZ, 505 TokenType.TIMESTAMPNTZ, 506 TokenType.DATETIME, 507 TokenType.DATETIME2, 508 TokenType.DATETIME64, 509 TokenType.SMALLDATETIME, 510 TokenType.DATE, 511 TokenType.DATE32, 512 TokenType.INT4RANGE, 513 TokenType.INT4MULTIRANGE, 514 TokenType.INT8RANGE, 515 TokenType.INT8MULTIRANGE, 516 TokenType.NUMRANGE, 517 TokenType.NUMMULTIRANGE, 518 TokenType.TSRANGE, 519 TokenType.TSMULTIRANGE, 520 TokenType.TSTZRANGE, 521 TokenType.TSTZMULTIRANGE, 522 TokenType.DATERANGE, 523 TokenType.DATEMULTIRANGE, 524 TokenType.DECIMAL, 525 TokenType.DECIMAL32, 526 TokenType.DECIMAL64, 527 TokenType.DECIMAL128, 528 TokenType.DECIMAL256, 529 TokenType.DECFLOAT, 530 TokenType.UDECIMAL, 531 TokenType.BIGDECIMAL, 532 TokenType.UUID, 533 TokenType.GEOGRAPHY, 534 TokenType.GEOGRAPHYPOINT, 535 TokenType.GEOMETRY, 536 TokenType.POINT, 537 TokenType.RING, 538 TokenType.LINESTRING, 539 TokenType.MULTILINESTRING, 540 TokenType.POLYGON, 541 TokenType.MULTIPOLYGON, 542 TokenType.HLLSKETCH, 543 TokenType.HSTORE, 544 TokenType.PSEUDO_TYPE, 545 TokenType.SUPER, 546 TokenType.SERIAL, 547 TokenType.SMALLSERIAL, 548 TokenType.BIGSERIAL, 549 TokenType.XML, 550 TokenType.YEAR, 551 TokenType.USERDEFINED, 552 TokenType.MONEY, 553 TokenType.SMALLMONEY, 554 TokenType.ROWVERSION, 555 TokenType.IMAGE, 556 TokenType.VARIANT, 557 TokenType.VECTOR, 558 TokenType.VOID, 559 TokenType.OBJECT, 560 TokenType.OBJECT_IDENTIFIER, 561 TokenType.INET, 562 TokenType.IPADDRESS, 563 TokenType.IPPREFIX, 564 TokenType.IPV4, 565 TokenType.IPV6, 566 TokenType.UNKNOWN, 567 TokenType.NOTHING, 568 TokenType.NULL, 569 TokenType.NAME, 570 TokenType.TDIGEST, 571 TokenType.DYNAMIC, 572 *ENUM_TYPE_TOKENS, 573 *NESTED_TYPE_TOKENS, 574 *AGGREGATE_TYPE_TOKENS, 575 } 576 577 SIGNED_TO_UNSIGNED_TYPE_TOKEN: t.ClassVar = { 578 TokenType.BIGINT: TokenType.UBIGINT, 579 TokenType.INT: TokenType.UINT, 580 TokenType.MEDIUMINT: TokenType.UMEDIUMINT, 581 TokenType.SMALLINT: TokenType.USMALLINT, 582 TokenType.TINYINT: TokenType.UTINYINT, 583 TokenType.DECIMAL: TokenType.UDECIMAL, 584 TokenType.DOUBLE: TokenType.UDOUBLE, 585 } 586 587 SUBQUERY_PREDICATES: t.ClassVar = { 588 TokenType.ANY: exp.Any, 589 TokenType.ALL: exp.All, 590 TokenType.EXISTS: exp.Exists, 591 TokenType.SOME: exp.Any, 592 } 593 594 SUBQUERY_TOKENS: t.ClassVar = { 595 TokenType.SELECT, 596 TokenType.WITH, 597 TokenType.FROM, 598 } 599 600 RESERVED_TOKENS: t.ClassVar = { 601 *Tokenizer.SINGLE_TOKENS.values(), 602 TokenType.SELECT, 603 } - {TokenType.IDENTIFIER} 604 605 DB_CREATABLES: t.ClassVar = { 606 TokenType.DATABASE, 607 TokenType.DICTIONARY, 608 TokenType.FILE_FORMAT, 609 TokenType.MODEL, 610 TokenType.NAMESPACE, 611 TokenType.SCHEMA, 612 TokenType.SEMANTIC_VIEW, 613 TokenType.SEQUENCE, 614 TokenType.SINK, 615 TokenType.SOURCE, 616 TokenType.STAGE, 617 TokenType.STORAGE_INTEGRATION, 618 TokenType.STREAMLIT, 619 TokenType.TABLE, 620 TokenType.TAG, 621 TokenType.VIEW, 622 TokenType.WAREHOUSE, 623 } 624 625 CREATABLES: t.ClassVar = { 626 TokenType.COLUMN, 627 TokenType.CONSTRAINT, 628 TokenType.FOREIGN_KEY, 629 TokenType.FUNCTION, 630 TokenType.INDEX, 631 TokenType.PROCEDURE, 632 TokenType.TRIGGER, 633 *DB_CREATABLES, 634 } 635 636 TRIGGER_EVENTS: t.ClassVar = { 637 TokenType.INSERT, 638 TokenType.UPDATE, 639 TokenType.DELETE, 640 TokenType.TRUNCATE, 641 } 642 643 ALTERABLES: t.ClassVar = { 644 TokenType.INDEX, 645 TokenType.TABLE, 646 TokenType.VIEW, 647 TokenType.SESSION, 648 } 649 650 # Tokens that can represent identifiers 651 ID_VAR_TOKENS: t.ClassVar[set] = { 652 TokenType.ALL, 653 TokenType.ANALYZE, 654 TokenType.ATTACH, 655 TokenType.VAR, 656 TokenType.ANTI, 657 TokenType.APPLY, 658 TokenType.ASC, 659 TokenType.ASOF, 660 TokenType.AUTO_INCREMENT, 661 TokenType.BEGIN, 662 TokenType.BPCHAR, 663 TokenType.CACHE, 664 TokenType.CASE, 665 TokenType.COLLATE, 666 TokenType.COMMAND, 667 TokenType.COMMENT, 668 TokenType.COMMIT, 669 TokenType.CONSTRAINT, 670 TokenType.COPY, 671 TokenType.CUBE, 672 TokenType.CURRENT_SCHEMA, 673 TokenType.DEFAULT, 674 TokenType.DELETE, 675 TokenType.DESC, 676 TokenType.DESCRIBE, 677 TokenType.DETACH, 678 TokenType.DICTIONARY, 679 TokenType.DIV, 680 TokenType.END, 681 TokenType.EXECUTE, 682 TokenType.EXPORT, 683 TokenType.ESCAPE, 684 TokenType.FALSE, 685 TokenType.FIRST, 686 TokenType.FILE, 687 TokenType.FILTER, 688 TokenType.FINAL, 689 TokenType.FORMAT, 690 TokenType.FULL, 691 TokenType.GET, 692 TokenType.IDENTIFIER, 693 TokenType.INOUT, 694 TokenType.IS, 695 TokenType.ISNULL, 696 TokenType.INTERVAL, 697 TokenType.KEEP, 698 TokenType.KILL, 699 TokenType.LEFT, 700 TokenType.LIMIT, 701 TokenType.LOAD, 702 TokenType.LOCK, 703 TokenType.MATCH, 704 TokenType.MERGE, 705 TokenType.NATURAL, 706 TokenType.NEXT, 707 TokenType.OFFSET, 708 TokenType.OPERATOR, 709 TokenType.ORDINALITY, 710 TokenType.OVER, 711 TokenType.OVERLAPS, 712 TokenType.OVERWRITE, 713 TokenType.PARTITION, 714 TokenType.PERCENT, 715 TokenType.PIVOT, 716 TokenType.PRAGMA, 717 TokenType.PUT, 718 TokenType.RANGE, 719 TokenType.RECURSIVE, 720 TokenType.REFERENCES, 721 TokenType.REFRESH, 722 TokenType.RENAME, 723 TokenType.REPLACE, 724 TokenType.RIGHT, 725 TokenType.ROLLUP, 726 TokenType.ROW, 727 TokenType.ROWS, 728 TokenType.SEMI, 729 TokenType.SET, 730 TokenType.SETTINGS, 731 TokenType.SHOW, 732 TokenType.STREAM, 733 TokenType.STREAMLIT, 734 TokenType.TEMPORARY, 735 TokenType.TOP, 736 TokenType.TRUE, 737 TokenType.TRUNCATE, 738 TokenType.UNIQUE, 739 TokenType.UNNEST, 740 TokenType.UNPIVOT, 741 TokenType.UPDATE, 742 TokenType.USE, 743 TokenType.VOLATILE, 744 TokenType.WINDOW, 745 TokenType.CURRENT_CATALOG, 746 TokenType.LOCALTIME, 747 TokenType.LOCALTIMESTAMP, 748 TokenType.SESSION_USER, 749 TokenType.STRAIGHT_JOIN, 750 *ALTERABLES, 751 *CREATABLES, 752 *SUBQUERY_PREDICATES, 753 *TYPE_TOKENS, 754 *NO_PAREN_FUNCTIONS, 755 } - {TokenType.UNION} 756 757 TABLE_ALIAS_TOKENS: t.ClassVar[set] = ID_VAR_TOKENS - { 758 TokenType.ANTI, 759 TokenType.ASOF, 760 TokenType.FULL, 761 TokenType.LEFT, 762 TokenType.LOCK, 763 TokenType.NATURAL, 764 TokenType.RIGHT, 765 TokenType.SEMI, 766 TokenType.WINDOW, 767 } 768 769 ALIAS_TOKENS: t.ClassVar = ID_VAR_TOKENS 770 771 COLON_PLACEHOLDER_TOKENS: t.ClassVar = ID_VAR_TOKENS 772 773 ARRAY_CONSTRUCTORS: t.ClassVar = { 774 "ARRAY": exp.Array, 775 "LIST": exp.List, 776 } 777 778 COMMENT_TABLE_ALIAS_TOKENS: t.ClassVar = TABLE_ALIAS_TOKENS - {TokenType.IS} 779 780 UPDATE_ALIAS_TOKENS: t.ClassVar = TABLE_ALIAS_TOKENS - {TokenType.SET} 781 782 TRIM_TYPES: t.ClassVar = {"LEADING", "TRAILING", "BOTH"} 783 784 # Tokens that indicate a simple column reference 785 IDENTIFIER_TOKENS: t.ClassVar[frozenset] = frozenset({TokenType.VAR, TokenType.IDENTIFIER}) 786 787 BRACKETS: t.ClassVar[frozenset] = frozenset({TokenType.L_BRACKET, TokenType.L_BRACE}) 788 789 # Postfix tokens that prevent the bare column fast path 790 COLUMN_POSTFIX_TOKENS: t.ClassVar[frozenset] = frozenset( 791 { 792 TokenType.L_PAREN, 793 TokenType.L_BRACKET, 794 TokenType.L_BRACE, 795 TokenType.COLON, 796 TokenType.JOIN_MARKER, 797 } 798 ) 799 800 TABLE_POSTFIX_TOKENS: t.ClassVar[frozenset] = frozenset( 801 { 802 TokenType.L_PAREN, 803 TokenType.L_BRACKET, 804 TokenType.L_BRACE, 805 TokenType.PIVOT, 806 TokenType.UNPIVOT, 807 TokenType.TABLE_SAMPLE, 808 } 809 ) 810 811 FUNC_TOKENS: t.ClassVar = { 812 TokenType.COLLATE, 813 TokenType.COMMAND, 814 TokenType.CURRENT_DATE, 815 TokenType.CURRENT_DATETIME, 816 TokenType.CURRENT_SCHEMA, 817 TokenType.CURRENT_TIMESTAMP, 818 TokenType.CURRENT_TIME, 819 TokenType.CURRENT_USER, 820 TokenType.CURRENT_CATALOG, 821 TokenType.FILTER, 822 TokenType.FIRST, 823 TokenType.FORMAT, 824 TokenType.GET, 825 TokenType.GLOB, 826 TokenType.IDENTIFIER, 827 TokenType.INDEX, 828 TokenType.ISNULL, 829 TokenType.ILIKE, 830 TokenType.INSERT, 831 TokenType.LIKE, 832 TokenType.LOCALTIME, 833 TokenType.LOCALTIMESTAMP, 834 TokenType.MERGE, 835 TokenType.NEXT, 836 TokenType.OFFSET, 837 TokenType.PRIMARY_KEY, 838 TokenType.RANGE, 839 TokenType.REPLACE, 840 TokenType.RLIKE, 841 TokenType.ROW, 842 TokenType.SESSION_USER, 843 TokenType.UNNEST, 844 TokenType.VAR, 845 TokenType.LEFT, 846 TokenType.RIGHT, 847 TokenType.SEQUENCE, 848 TokenType.DATE, 849 TokenType.DATETIME, 850 TokenType.TABLE, 851 TokenType.TIMESTAMP, 852 TokenType.TIMESTAMPTZ, 853 TokenType.TRUNCATE, 854 TokenType.UTC_DATE, 855 TokenType.UTC_TIME, 856 TokenType.UTC_TIMESTAMP, 857 TokenType.WINDOW, 858 TokenType.XOR, 859 *TYPE_TOKENS, 860 *SUBQUERY_PREDICATES, 861 } 862 863 CONJUNCTION: t.ClassVar[dict[TokenType, type[exp.Expr]]] = { 864 TokenType.AND: exp.And, 865 } 866 867 ASSIGNMENT: t.ClassVar[dict[TokenType, type[exp.Expr]]] = { 868 TokenType.COLON_EQ: exp.PropertyEQ, 869 } 870 871 DISJUNCTION: t.ClassVar[dict[TokenType, type[exp.Expr]]] = { 872 TokenType.OR: exp.Or, 873 } 874 875 EQUALITY: t.ClassVar = { 876 TokenType.EQ: exp.EQ, 877 TokenType.NEQ: exp.NEQ, 878 TokenType.NULLSAFE_EQ: exp.NullSafeEQ, 879 } 880 881 COMPARISON: t.ClassVar = { 882 TokenType.GT: exp.GT, 883 TokenType.GTE: exp.GTE, 884 TokenType.LT: exp.LT, 885 TokenType.LTE: exp.LTE, 886 } 887 888 BITWISE: t.ClassVar = { 889 TokenType.AMP: exp.BitwiseAnd, 890 TokenType.CARET: exp.BitwiseXor, 891 TokenType.PIPE: exp.BitwiseOr, 892 } 893 894 TERM: t.ClassVar = { 895 TokenType.DASH: exp.Sub, 896 TokenType.PLUS: exp.Add, 897 TokenType.MOD: exp.Mod, 898 TokenType.COLLATE: exp.Collate, 899 } 900 901 FACTOR: t.ClassVar = { 902 TokenType.DIV: exp.IntDiv, 903 TokenType.LR_ARROW: exp.Distance, 904 TokenType.SLASH: exp.Div, 905 TokenType.STAR: exp.Mul, 906 } 907 908 EXPONENT: t.ClassVar[dict[TokenType, type[exp.Expr]]] = {} 909 910 TIMES: t.ClassVar = { 911 TokenType.TIME, 912 TokenType.TIMETZ, 913 } 914 915 TIMESTAMPS: t.ClassVar = { 916 TokenType.TIMESTAMP, 917 TokenType.TIMESTAMPNTZ, 918 TokenType.TIMESTAMPTZ, 919 TokenType.TIMESTAMPLTZ, 920 *TIMES, 921 } 922 923 SET_OPERATIONS: t.ClassVar = { 924 TokenType.UNION, 925 TokenType.INTERSECT, 926 TokenType.EXCEPT, 927 } 928 929 JOIN_METHODS: t.ClassVar = { 930 TokenType.ASOF, 931 TokenType.NATURAL, 932 TokenType.POSITIONAL, 933 } 934 935 JOIN_SIDES: t.ClassVar = { 936 TokenType.LEFT, 937 TokenType.RIGHT, 938 TokenType.FULL, 939 } 940 941 JOIN_KINDS: t.ClassVar = { 942 TokenType.ANTI, 943 TokenType.CROSS, 944 TokenType.INNER, 945 TokenType.OUTER, 946 TokenType.SEMI, 947 TokenType.STRAIGHT_JOIN, 948 } 949 950 JOIN_HINTS: t.ClassVar[set[str]] = set() 951 952 # Tokens that unambiguously end a table reference on the fast path 953 TABLE_TERMINATORS: t.ClassVar[frozenset] = frozenset( 954 { 955 TokenType.COMMA, 956 TokenType.GROUP_BY, 957 TokenType.HAVING, 958 TokenType.JOIN, 959 TokenType.LIMIT, 960 TokenType.ON, 961 TokenType.ORDER_BY, 962 TokenType.R_PAREN, 963 TokenType.SEMICOLON, 964 TokenType.SENTINEL, 965 TokenType.WHERE, 966 *SET_OPERATIONS, 967 *JOIN_KINDS, 968 *JOIN_METHODS, 969 *JOIN_SIDES, 970 } 971 ) 972 973 LAMBDAS: t.ClassVar = { 974 TokenType.ARROW: lambda self, expressions: self.expression( 975 exp.Lambda( 976 this=self._replace_lambda( 977 self._parse_disjunction(), 978 expressions, 979 ), 980 expressions=expressions, 981 ) 982 ), 983 TokenType.FARROW: lambda self, expressions: self.expression( 984 exp.Kwarg(this=exp.var(expressions[0].name), expression=self._parse_disjunction()) 985 ), 986 } 987 988 # Whether lambda args include type annotations, e.g. TRANSFORM(arr, x INT -> x + 1) in Snowflake 989 TYPED_LAMBDA_ARGS: t.ClassVar[bool] = False 990 991 LAMBDA_ARG_TERMINATORS: t.ClassVar[frozenset] = frozenset({TokenType.COMMA, TokenType.R_PAREN}) 992 993 COLUMN_OPERATORS: t.ClassVar = { 994 TokenType.DOT: None, 995 TokenType.DOTCOLON: lambda self, this, to: self.expression(exp.JSONCast(this=this, to=to)), 996 TokenType.DCOLON: lambda self, this, to: self.build_cast( 997 strict=self.STRICT_CAST, this=this, to=to 998 ), 999 TokenType.ARROW: lambda self, this, path: self.expression( 1000 exp.JSONExtract( 1001 this=this, 1002 expression=self.dialect.to_json_path(path), 1003 only_json_types=self.JSON_ARROWS_REQUIRE_JSON_TYPE, 1004 ) 1005 ), 1006 TokenType.DARROW: lambda self, this, path: self.expression( 1007 exp.JSONExtractScalar( 1008 this=this, 1009 expression=self.dialect.to_json_path(path), 1010 only_json_types=self.JSON_ARROWS_REQUIRE_JSON_TYPE, 1011 scalar_only=self.dialect.JSON_EXTRACT_SCALAR_SCALAR_ONLY, 1012 ) 1013 ), 1014 TokenType.HASH_ARROW: lambda self, this, path: self.expression( 1015 exp.JSONBExtract(this=this, expression=path) 1016 ), 1017 TokenType.DHASH_ARROW: lambda self, this, path: self.expression( 1018 exp.JSONBExtractScalar(this=this, expression=path) 1019 ), 1020 TokenType.PLACEHOLDER: lambda self, this, key: self.expression( 1021 exp.JSONBContains(this=this, expression=key) 1022 ), 1023 } 1024 1025 CAST_COLUMN_OPERATORS: t.ClassVar = { 1026 TokenType.DOTCOLON, 1027 TokenType.DCOLON, 1028 } 1029 1030 EXPRESSION_PARSERS: t.ClassVar = { 1031 exp.Cluster: lambda self: self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 1032 exp.Column: lambda self: self._parse_column(), 1033 exp.ColumnDef: lambda self: self._parse_column_def(self._parse_column()), 1034 exp.Condition: lambda self: self._parse_disjunction(), 1035 exp.DataType: lambda self: self._parse_types(allow_identifiers=False, schema=True), 1036 exp.Expr: lambda self: self._parse_expression(), 1037 exp.From: lambda self: self._parse_from(joins=True), 1038 exp.GrantPrincipal: lambda self: self._parse_grant_principal(), 1039 exp.GrantPrivilege: lambda self: self._parse_grant_privilege(), 1040 exp.Group: lambda self: self._parse_group(), 1041 exp.Having: lambda self: self._parse_having(), 1042 exp.Hint: lambda self: self._parse_hint_body(), 1043 exp.Identifier: lambda self: self._parse_id_var(), 1044 exp.Join: lambda self: self._parse_join(), 1045 exp.Lambda: lambda self: self._parse_lambda(), 1046 exp.Lateral: lambda self: self._parse_lateral(), 1047 exp.Limit: lambda self: self._parse_limit(), 1048 exp.Offset: lambda self: self._parse_offset(), 1049 exp.Order: lambda self: self._parse_order(), 1050 exp.Ordered: lambda self: self._parse_ordered(), 1051 exp.Properties: lambda self: self._parse_properties(), 1052 exp.PartitionedByProperty: lambda self: self._parse_partitioned_by(), 1053 exp.Qualify: lambda self: self._parse_qualify(), 1054 exp.Returning: lambda self: self._parse_returning(), 1055 exp.Select: lambda self: self._parse_select(), 1056 exp.Sort: lambda self: self._parse_sort(exp.Sort, TokenType.SORT_BY), 1057 exp.Table: lambda self: self._parse_table_parts(), 1058 exp.TableAlias: lambda self: self._parse_table_alias(), 1059 exp.Tuple: lambda self: self._parse_value(values=False), 1060 exp.Whens: lambda self: self._parse_when_matched(), 1061 exp.Where: lambda self: self._parse_where(), 1062 exp.Window: lambda self: self._parse_named_window(), 1063 exp.With: lambda self: self._parse_with(), 1064 } 1065 1066 STATEMENT_PARSERS: t.ClassVar = { 1067 TokenType.ALTER: lambda self: self._parse_alter(), 1068 TokenType.ANALYZE: lambda self: self._parse_analyze(), 1069 TokenType.BEGIN: lambda self: self._parse_transaction(), 1070 TokenType.CACHE: lambda self: self._parse_cache(), 1071 TokenType.COMMENT: lambda self: self._parse_comment(), 1072 TokenType.COMMIT: lambda self: self._parse_commit_or_rollback(), 1073 TokenType.COPY: lambda self: self._parse_copy(), 1074 TokenType.CREATE: lambda self: self._parse_create(), 1075 TokenType.DELETE: lambda self: self._parse_delete(), 1076 TokenType.DESC: lambda self: self._parse_describe(), 1077 TokenType.DESCRIBE: lambda self: self._parse_describe(), 1078 TokenType.DROP: lambda self: self._parse_drop(), 1079 TokenType.GRANT: lambda self: self._parse_grant(), 1080 TokenType.REVOKE: lambda self: self._parse_revoke(), 1081 TokenType.INSERT: lambda self: self._parse_insert(), 1082 TokenType.KILL: lambda self: self._parse_kill(), 1083 TokenType.LOAD: lambda self: self._parse_load(), 1084 TokenType.MERGE: lambda self: self._parse_merge(), 1085 TokenType.PIVOT: lambda self: self._parse_simplified_pivot(), 1086 TokenType.PRAGMA: lambda self: self.expression(exp.Pragma(this=self._parse_expression())), 1087 TokenType.REFRESH: lambda self: self._parse_refresh(), 1088 TokenType.ROLLBACK: lambda self: self._parse_commit_or_rollback(), 1089 TokenType.SET: lambda self: self._parse_set(), 1090 TokenType.TRUNCATE: lambda self: self._parse_truncate_table(), 1091 TokenType.UNCACHE: lambda self: self._parse_uncache(), 1092 TokenType.UNPIVOT: lambda self: self._parse_simplified_pivot(is_unpivot=True), 1093 TokenType.UPDATE: lambda self: self._parse_update(), 1094 TokenType.USE: lambda self: self._parse_use(), 1095 TokenType.SEMICOLON: lambda self: exp.Semicolon(), 1096 } 1097 1098 UNARY_PARSERS: t.ClassVar = { 1099 TokenType.PLUS: lambda self: self._parse_unary(), # Unary + is handled as a no-op 1100 TokenType.NOT: lambda self: self.expression(exp.Not(this=self._parse_equality())), 1101 TokenType.TILDE: lambda self: self.expression(exp.BitwiseNot(this=self._parse_unary())), 1102 TokenType.DASH: lambda self: self.expression(exp.Neg(this=self._parse_unary())), 1103 TokenType.PIPE_SLASH: lambda self: self.expression(exp.Sqrt(this=self._parse_unary())), 1104 TokenType.DPIPE_SLASH: lambda self: self.expression(exp.Cbrt(this=self._parse_unary())), 1105 } 1106 1107 STRING_PARSERS: t.ClassVar = { 1108 TokenType.HEREDOC_STRING: lambda self, token: self.expression( 1109 exp.RawString(this=token.text), token 1110 ), 1111 TokenType.NATIONAL_STRING: lambda self, token: self.expression( 1112 exp.National(this=token.text), token 1113 ), 1114 TokenType.RAW_STRING: lambda self, token: self.expression( 1115 exp.RawString(this=token.text), token 1116 ), 1117 TokenType.STRING: lambda self, token: self.expression( 1118 exp.Literal(this=token.text, is_string=True), token 1119 ), 1120 TokenType.UNICODE_STRING: lambda self, token: self.expression( 1121 exp.UnicodeString( 1122 this=token.text, escape=self._match_text_seq("UESCAPE") and self._parse_string() 1123 ), 1124 token, 1125 ), 1126 } 1127 1128 NUMERIC_PARSERS: t.ClassVar = { 1129 TokenType.BIT_STRING: lambda self, token: self.expression( 1130 exp.BitString(this=token.text), token 1131 ), 1132 TokenType.BYTE_STRING: lambda self, token: self.expression( 1133 exp.ByteString( 1134 this=token.text, is_bytes=self.dialect.BYTE_STRING_IS_BYTES_TYPE or None 1135 ), 1136 token, 1137 ), 1138 TokenType.HEX_STRING: lambda self, token: self.expression( 1139 exp.HexString( 1140 this=token.text, is_integer=self.dialect.HEX_STRING_IS_INTEGER_TYPE or None 1141 ), 1142 token, 1143 ), 1144 TokenType.NUMBER: lambda self, token: self.expression( 1145 exp.Literal(this=token.text, is_string=False), token 1146 ), 1147 } 1148 1149 PRIMARY_PARSERS: t.ClassVar = { 1150 **STRING_PARSERS, 1151 **NUMERIC_PARSERS, 1152 TokenType.INTRODUCER: lambda self, token: self._parse_introducer(token), 1153 TokenType.NULL: lambda self, _: self.expression(exp.Null()), 1154 TokenType.TRUE: lambda self, _: self.expression(exp.Boolean(this=True)), 1155 TokenType.FALSE: lambda self, _: self.expression(exp.Boolean(this=False)), 1156 TokenType.SESSION_PARAMETER: lambda self, _: self._parse_session_parameter(), 1157 TokenType.STAR: lambda self, _: self._parse_star_ops(), 1158 } 1159 1160 PLACEHOLDER_PARSERS: t.ClassVar = { 1161 TokenType.PLACEHOLDER: lambda self: self.expression(exp.Placeholder()), 1162 TokenType.PARAMETER: lambda self: self._parse_parameter(), 1163 TokenType.COLON: lambda self: ( 1164 self.expression(exp.Placeholder(this=self._prev.text)) 1165 if self._match_set(self.COLON_PLACEHOLDER_TOKENS) 1166 else None 1167 ), 1168 } 1169 1170 RANGE_PARSERS: t.ClassVar = { 1171 TokenType.AT_GT: binary_range_parser(exp.ArrayContainsAll), 1172 TokenType.BETWEEN: lambda self, this: self._parse_between(this), 1173 TokenType.GLOB: binary_range_parser(exp.Glob), 1174 TokenType.ILIKE: binary_range_parser(exp.ILike), 1175 TokenType.IN: lambda self, this: self._parse_in(this), 1176 TokenType.IRLIKE: binary_range_parser(exp.RegexpILike), 1177 TokenType.IS: lambda self, this: self._parse_is(this), 1178 TokenType.LIKE: binary_range_parser(exp.Like), 1179 TokenType.LT_AT: binary_range_parser(exp.ArrayContainsAll, reverse_args=True), 1180 TokenType.OVERLAPS: binary_range_parser(exp.Overlaps), 1181 TokenType.RLIKE: binary_range_parser(exp.RegexpLike), 1182 TokenType.SIMILAR_TO: binary_range_parser(exp.SimilarTo), 1183 TokenType.FOR: lambda self, this: self._parse_comprehension(this), 1184 TokenType.QMARK_AMP: binary_range_parser(exp.JSONBContainsAllTopKeys), 1185 TokenType.QMARK_PIPE: binary_range_parser(exp.JSONBContainsAnyTopKeys), 1186 TokenType.HASH_DASH: binary_range_parser(exp.JSONBDeleteAtPath), 1187 TokenType.ADJACENT: binary_range_parser(exp.Adjacent), 1188 TokenType.OPERATOR: lambda self, this: self._parse_operator(this), 1189 TokenType.AMP_LT: binary_range_parser(exp.ExtendsLeft), 1190 TokenType.AMP_GT: binary_range_parser(exp.ExtendsRight), 1191 } 1192 1193 PIPE_SYNTAX_TRANSFORM_PARSERS: t.ClassVar = { 1194 "AGGREGATE": lambda self, query: self._parse_pipe_syntax_aggregate(query), 1195 "AS": lambda self, query: self._build_pipe_cte( 1196 query, [exp.Star()], self._parse_table_alias() 1197 ), 1198 "DISTINCT": lambda self, query: self._advance() or query.distinct(copy=False), 1199 "EXTEND": lambda self, query: self._parse_pipe_syntax_extend(query), 1200 "LIMIT": lambda self, query: self._parse_pipe_syntax_limit(query), 1201 "ORDER BY": lambda self, query: query.order_by( 1202 self._parse_order(), append=False, copy=False 1203 ), 1204 "PIVOT": lambda self, query: self._parse_pipe_syntax_pivot(query), 1205 "SELECT": lambda self, query: self._parse_pipe_syntax_select(query), 1206 "TABLESAMPLE": lambda self, query: self._parse_pipe_syntax_tablesample(query), 1207 "UNPIVOT": lambda self, query: self._parse_pipe_syntax_pivot(query), 1208 "WHERE": lambda self, query: query.where(self._parse_where(), copy=False), 1209 } 1210 1211 PROPERTY_PARSERS: t.ClassVar[dict[str, t.Callable]] = { 1212 "ALLOWED_VALUES": lambda self: self.expression( 1213 exp.AllowedValuesProperty(expressions=self._parse_csv(self._parse_primary)) 1214 ), 1215 "ALGORITHM": lambda self: self._parse_property_assignment(exp.AlgorithmProperty), 1216 "AUTO": lambda self: self._parse_auto_property(), 1217 "AUTO_INCREMENT": lambda self: self._parse_property_assignment(exp.AutoIncrementProperty), 1218 "BACKUP": lambda self: self.expression( 1219 exp.BackupProperty(this=self._parse_var(any_token=True)) 1220 ), 1221 "BLOCKCOMPRESSION": lambda self: self._parse_blockcompression(), 1222 "CHARSET": lambda self, **kwargs: self._parse_character_set(**kwargs), 1223 "CHARACTER SET": lambda self, **kwargs: self._parse_character_set(**kwargs), 1224 "CHECKSUM": lambda self: self._parse_checksum(), 1225 "CLUSTER BY": lambda self: self._parse_cluster(), 1226 "CLUSTERED": lambda self: self._parse_clustered_by(), 1227 "COLLATE": lambda self, **kwargs: self._parse_property_assignment( 1228 exp.CollateProperty, **kwargs 1229 ), 1230 "COMMENT": lambda self: self._parse_property_assignment(exp.SchemaCommentProperty), 1231 "CONTAINS": lambda self: self._parse_contains_property(), 1232 "COPY": lambda self: self._parse_copy_property(), 1233 "DATABLOCKSIZE": lambda self, **kwargs: self._parse_datablocksize(**kwargs), 1234 "DATA_DELETION": lambda self: self._parse_data_deletion_property(), 1235 "DEFINER": lambda self: self._parse_definer(), 1236 "DETERMINISTIC": lambda self: self.expression( 1237 exp.StabilityProperty(this=exp.Literal.string("IMMUTABLE")) 1238 ), 1239 "DISTRIBUTED": lambda self: self._parse_distributed_property(), 1240 "DUPLICATE": lambda self: self._parse_composite_key_property(exp.DuplicateKeyProperty), 1241 "DYNAMIC": lambda self: self.expression(exp.DynamicProperty()), 1242 "DISTKEY": lambda self: self._parse_distkey(), 1243 "DISTSTYLE": lambda self: self._parse_property_assignment(exp.DistStyleProperty), 1244 "EMPTY": lambda self: self.expression(exp.EmptyProperty()), 1245 "ENGINE": lambda self: self._parse_property_assignment(exp.EngineProperty), 1246 "ENVIRONMENT": lambda self: self.expression( 1247 exp.EnviromentProperty(expressions=self._parse_wrapped_csv(self._parse_assignment)) 1248 ), 1249 "HANDLER": lambda self: self._parse_property_assignment(exp.HandlerProperty), 1250 "EXECUTE": lambda self: self._parse_property_assignment(exp.ExecuteAsProperty), 1251 "EXTERNAL": lambda self: self.expression(exp.ExternalProperty()), 1252 "FALLBACK": lambda self, **kwargs: self._parse_fallback(**kwargs), 1253 "FORMAT": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 1254 "FREESPACE": lambda self: self._parse_freespace(), 1255 "GLOBAL": lambda self: self.expression(exp.GlobalProperty()), 1256 "HEAP": lambda self: self.expression(exp.HeapProperty()), 1257 "ICEBERG": lambda self: self.expression(exp.IcebergProperty()), 1258 "IMMUTABLE": lambda self: self.expression( 1259 exp.StabilityProperty(this=exp.Literal.string("IMMUTABLE")) 1260 ), 1261 "INHERITS": lambda self: self.expression( 1262 exp.InheritsProperty(expressions=self._parse_wrapped_csv(self._parse_table)) 1263 ), 1264 "INPUT": lambda self: self.expression(exp.InputModelProperty(this=self._parse_schema())), 1265 "JOURNAL": lambda self, **kwargs: self._parse_journal(**kwargs), 1266 "LANGUAGE": lambda self: self._parse_property_assignment(exp.LanguageProperty), 1267 "LAYOUT": lambda self: self._parse_dict_property(this="LAYOUT"), 1268 "LIFETIME": lambda self: self._parse_dict_range(this="LIFETIME"), 1269 "LIKE": lambda self: self._parse_create_like(), 1270 "LOCATION": lambda self: self._parse_property_assignment(exp.LocationProperty), 1271 "LOCK": lambda self: self._parse_locking(), 1272 "LOCKING": lambda self: self._parse_locking(), 1273 "LOG": lambda self, **kwargs: self._parse_log(**kwargs), 1274 "MATERIALIZED": lambda self: self.expression(exp.MaterializedProperty()), 1275 "MERGEBLOCKRATIO": lambda self, **kwargs: self._parse_mergeblockratio(**kwargs), 1276 "MODIFIES": lambda self: self._parse_modifies_property(), 1277 "MULTISET": lambda self: self.expression(exp.SetProperty(multi=True)), 1278 "NO": lambda self: self._parse_no_property(), 1279 "ON": lambda self: self._parse_on_property(), 1280 "ORDER BY": lambda self: self._parse_order(skip_order_token=True), 1281 "OUTPUT": lambda self: self.expression(exp.OutputModelProperty(this=self._parse_schema())), 1282 "PARTITION": lambda self: self._parse_partitioned_of(), 1283 "PARTITION BY": lambda self: self._parse_partitioned_by(), 1284 "PARTITIONED BY": lambda self: self._parse_partitioned_by(), 1285 "PARTITIONED_BY": lambda self: self._parse_partitioned_by(), 1286 "PRIMARY KEY": lambda self: self._parse_primary_key(in_props=True), 1287 "RANGE": lambda self: self._parse_dict_range(this="RANGE"), 1288 "READS": lambda self: self._parse_reads_property(), 1289 "REMOTE": lambda self: self._parse_remote_with_connection(), 1290 "RETURNS": lambda self: self._parse_returns(), 1291 "STRICT": lambda self: self.expression(exp.StrictProperty()), 1292 "STREAMING": lambda self: self.expression(exp.StreamingTableProperty()), 1293 "ROW": lambda self: self._parse_row(), 1294 "ROW_FORMAT": lambda self: self._parse_property_assignment(exp.RowFormatProperty), 1295 "SAMPLE": lambda self: self.expression( 1296 exp.SampleProperty(this=self._match_text_seq("BY") and self._parse_bitwise()) 1297 ), 1298 "SECURE": lambda self: self.expression(exp.SecureProperty()), 1299 "SECURITY": lambda self: self._parse_sql_security(), 1300 "SQL SECURITY": lambda self: self._parse_sql_security(), 1301 "SET": lambda self: self.expression(exp.SetProperty(multi=False)), 1302 "SETTINGS": lambda self: self._parse_settings_property(), 1303 "SHARING": lambda self: self._parse_property_assignment(exp.SharingProperty), 1304 "SORTKEY": lambda self: self._parse_sortkey(), 1305 "SOURCE": lambda self: self._parse_dict_property(this="SOURCE"), 1306 "STABLE": lambda self: self.expression( 1307 exp.StabilityProperty(this=exp.Literal.string("STABLE")) 1308 ), 1309 "STORED": lambda self: self._parse_stored(), 1310 "SYSTEM_VERSIONING": lambda self: self._parse_system_versioning_property(), 1311 "TBLPROPERTIES": lambda self: self._parse_wrapped_properties(), 1312 "TEMP": lambda self: self.expression(exp.TemporaryProperty()), 1313 "TEMPORARY": lambda self: self.expression(exp.TemporaryProperty()), 1314 "TO": lambda self: self._parse_to_table(), 1315 "TRANSIENT": lambda self: self.expression(exp.TransientProperty()), 1316 "TRANSFORM": lambda self: self.expression( 1317 exp.TransformModelProperty(expressions=self._parse_wrapped_csv(self._parse_expression)) 1318 ), 1319 "TTL": lambda self: self._parse_ttl(), 1320 "USING": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 1321 "UNLOGGED": lambda self: self.expression(exp.UnloggedProperty()), 1322 "VOLATILE": lambda self: self._parse_volatile_property(), 1323 "WITH": lambda self: self._parse_with_property(), 1324 } 1325 1326 CONSTRAINT_PARSERS: t.ClassVar = { 1327 "AUTOINCREMENT": lambda self: self._parse_auto_increment(), 1328 "AUTO_INCREMENT": lambda self: self._parse_auto_increment(), 1329 "CASESPECIFIC": lambda self: self.expression(exp.CaseSpecificColumnConstraint(not_=False)), 1330 "CHARACTER SET": lambda self: self.expression( 1331 exp.CharacterSetColumnConstraint(this=self._parse_var_or_string()) 1332 ), 1333 "CHECK": lambda self: self._parse_check_constraint(), 1334 "COLLATE": lambda self: self.expression( 1335 exp.CollateColumnConstraint(this=self._parse_identifier() or self._parse_column()) 1336 ), 1337 "COMMENT": lambda self: self.expression( 1338 exp.CommentColumnConstraint(this=self._parse_string()) 1339 ), 1340 "COMPRESS": lambda self: self._parse_compress(), 1341 "CLUSTERED": lambda self: self.expression( 1342 exp.ClusteredColumnConstraint(this=self._parse_wrapped_csv(self._parse_ordered)) 1343 ), 1344 "NONCLUSTERED": lambda self: self.expression( 1345 exp.NonClusteredColumnConstraint(this=self._parse_wrapped_csv(self._parse_ordered)) 1346 ), 1347 "DEFAULT": lambda self: self.expression( 1348 exp.DefaultColumnConstraint(this=self._parse_bitwise()) 1349 ), 1350 "ENCODE": lambda self: self.expression(exp.EncodeColumnConstraint(this=self._parse_var())), 1351 "EPHEMERAL": lambda self: self.expression( 1352 exp.EphemeralColumnConstraint(this=self._parse_bitwise()) 1353 ), 1354 "EXCLUDE": lambda self: self.expression( 1355 exp.ExcludeColumnConstraint(this=self._parse_index_params()) 1356 ), 1357 "FOREIGN KEY": lambda self: self._parse_foreign_key(), 1358 "FORMAT": lambda self: self.expression( 1359 exp.DateFormatColumnConstraint(this=self._parse_var_or_string()) 1360 ), 1361 "GENERATED": lambda self: self._parse_generated_as_identity(), 1362 "IDENTITY": lambda self: self._parse_auto_increment(), 1363 "INLINE": lambda self: self._parse_inline(), 1364 "LIKE": lambda self: self._parse_create_like(), 1365 "NOT": lambda self: self._parse_not_constraint(), 1366 "NULL": lambda self: self.expression(exp.NotNullColumnConstraint(allow_null=True)), 1367 "ON": lambda self: ( 1368 ( 1369 self._match(TokenType.UPDATE) 1370 and self.expression(exp.OnUpdateColumnConstraint(this=self._parse_function())) 1371 ) 1372 or self.expression(exp.OnProperty(this=self._parse_id_var())) 1373 ), 1374 "PATH": lambda self: self.expression(exp.PathColumnConstraint(this=self._parse_string())), 1375 "PERIOD": lambda self: self._parse_period_for_system_time(), 1376 "PRIMARY KEY": lambda self: self._parse_primary_key(), 1377 "REFERENCES": lambda self: self._parse_references(match=False), 1378 "TITLE": lambda self: self.expression( 1379 exp.TitleColumnConstraint(this=self._parse_var_or_string()) 1380 ), 1381 "TTL": lambda self: self.expression(exp.MergeTreeTTL(expressions=[self._parse_bitwise()])), 1382 "UNIQUE": lambda self: self._parse_unique(), 1383 "UPPERCASE": lambda self: self.expression(exp.UppercaseColumnConstraint()), 1384 "WITH": lambda self: self.expression( 1385 exp.Properties(expressions=self._parse_wrapped_properties()) 1386 ), 1387 "BUCKET": lambda self: self._parse_partitioned_by_bucket_or_truncate(), 1388 "TRUNCATE": lambda self: self._parse_partitioned_by_bucket_or_truncate(), 1389 } 1390 1391 def _parse_partitioned_by_bucket_or_truncate(self) -> exp.Expr | None: 1392 if not self._match(TokenType.L_PAREN, advance=False): 1393 # Partitioning by bucket or truncate follows the syntax: 1394 # PARTITION BY (BUCKET(..) | TRUNCATE(..)) 1395 # If we don't have parenthesis after each keyword, we should instead parse this as an identifier 1396 self._retreat(self._index - 1) 1397 return None 1398 1399 klass = ( 1400 exp.PartitionedByBucket 1401 if self._prev.text.upper() == "BUCKET" 1402 else exp.PartitionByTruncate 1403 ) 1404 1405 args = self._parse_wrapped_csv(lambda: self._parse_primary() or self._parse_column()) 1406 this, expression = seq_get(args, 0), seq_get(args, 1) 1407 1408 if isinstance(this, exp.Literal): 1409 # Check for Iceberg partition transforms (bucket / truncate) and ensure their arguments are in the right order 1410 # - For Hive, it's `bucket(<num buckets>, <col name>)` or `truncate(<num_chars>, <col_name>)` 1411 # - For Trino, it's reversed - `bucket(<col name>, <num buckets>)` or `truncate(<col_name>, <num_chars>)` 1412 # Both variants are canonicalized in the latter i.e `bucket(<col name>, <num buckets>)` 1413 # 1414 # Hive ref: https://docs.aws.amazon.com/athena/latest/ug/querying-iceberg-creating-tables.html#querying-iceberg-partitioning 1415 # Trino ref: https://docs.aws.amazon.com/athena/latest/ug/create-table-as.html#ctas-table-properties 1416 this, expression = expression, this 1417 1418 return self.expression(klass(this=this, expression=expression)) 1419 1420 ALTER_PARSERS: t.ClassVar = { 1421 "ADD": lambda self: self._parse_alter_table_add(), 1422 "AS": lambda self: self._parse_select(), 1423 "ALTER": lambda self: self._parse_alter_table_alter(), 1424 "CLUSTER BY": lambda self: self._parse_cluster(wrapped=True), 1425 "DELETE": lambda self: self.expression(exp.Delete(where=self._parse_where())), 1426 "DROP": lambda self: self._parse_alter_table_drop(), 1427 "RENAME": lambda self: self._parse_alter_table_rename(), 1428 "SET": lambda self: self._parse_alter_table_set(), 1429 "SWAP": lambda self: self.expression( 1430 exp.SwapTable(this=self._match(TokenType.WITH) and self._parse_table(schema=True)) 1431 ), 1432 } 1433 1434 ALTER_ALTER_PARSERS: t.ClassVar = { 1435 "DISTKEY": lambda self: self._parse_alter_diststyle(), 1436 "DISTSTYLE": lambda self: self._parse_alter_diststyle(), 1437 "SORTKEY": lambda self: self._parse_alter_sortkey(), 1438 "COMPOUND": lambda self: self._parse_alter_sortkey(compound=True), 1439 } 1440 1441 SCHEMA_UNNAMED_CONSTRAINTS: t.ClassVar = { 1442 "CHECK", 1443 "EXCLUDE", 1444 "FOREIGN KEY", 1445 "LIKE", 1446 "PERIOD", 1447 "PRIMARY KEY", 1448 "UNIQUE", 1449 "BUCKET", 1450 "TRUNCATE", 1451 } 1452 1453 NO_PAREN_FUNCTION_PARSERS: t.ClassVar = { 1454 "ANY": lambda self: self.expression(exp.Any(this=self._parse_bitwise())), 1455 "CASE": lambda self: self._parse_case(), 1456 "CONNECT_BY_ROOT": lambda self: self.expression( 1457 exp.ConnectByRoot(this=self._parse_column()) 1458 ), 1459 "IF": lambda self: self._parse_if(), 1460 } 1461 1462 INVALID_FUNC_NAME_TOKENS: t.ClassVar = { 1463 TokenType.IDENTIFIER, 1464 TokenType.STRING, 1465 } 1466 1467 FUNCTIONS_WITH_ALIASED_ARGS: t.ClassVar = {"STRUCT"} 1468 1469 KEY_VALUE_DEFINITIONS: t.ClassVar = (exp.Alias, exp.EQ, exp.PropertyEQ, exp.Slice) 1470 1471 FUNCTION_PARSERS: t.ClassVar[dict[str, t.Callable]] = { 1472 **{ 1473 name: lambda self: self._parse_max_min_by(exp.ArgMax) for name in exp.ArgMax.sql_names() 1474 }, 1475 **{ 1476 name: lambda self: self._parse_max_min_by(exp.ArgMin) for name in exp.ArgMin.sql_names() 1477 }, 1478 "CAST": lambda self: self._parse_cast(self.STRICT_CAST), 1479 "CEIL": lambda self: self._parse_ceil_floor(exp.Ceil), 1480 "CONVERT": lambda self: self._parse_convert(self.STRICT_CAST), 1481 "CHAR": lambda self: self._parse_char(), 1482 "CHR": lambda self: self._parse_char(), 1483 "DECODE": lambda self: self._parse_decode(), 1484 "EXTRACT": lambda self: self._parse_extract(), 1485 "FLOOR": lambda self: self._parse_ceil_floor(exp.Floor), 1486 "GAP_FILL": lambda self: self._parse_gap_fill(), 1487 "INITCAP": lambda self: self._parse_initcap(), 1488 "JSON_OBJECT": lambda self: self._parse_json_object(), 1489 "JSON_OBJECTAGG": lambda self: self._parse_json_object(agg=True), 1490 "JSON_TABLE": lambda self: self._parse_json_table(), 1491 "MATCH": lambda self: self._parse_match_against(), 1492 "NORMALIZE": lambda self: self._parse_normalize(), 1493 "OPENJSON": lambda self: self._parse_open_json(), 1494 "OVERLAY": lambda self: self._parse_overlay(), 1495 "POSITION": lambda self: self._parse_position(), 1496 "SAFE_CAST": lambda self: self._parse_cast(False, safe=True), 1497 "STRING_AGG": lambda self: self._parse_string_agg(), 1498 "SUBSTRING": lambda self: self._parse_substring(), 1499 "TRIM": lambda self: self._parse_trim(), 1500 "TRY_CAST": lambda self: self._parse_cast(False, safe=True), 1501 "TRY_CONVERT": lambda self: self._parse_convert(False, safe=True), 1502 "XMLELEMENT": lambda self: self._parse_xml_element(), 1503 "XMLTABLE": lambda self: self._parse_xml_table(), 1504 } 1505 1506 QUERY_MODIFIER_PARSERS: t.ClassVar = { 1507 TokenType.MATCH_RECOGNIZE: lambda self: ("match", self._parse_match_recognize()), 1508 TokenType.PREWHERE: lambda self: ("prewhere", self._parse_prewhere()), 1509 TokenType.WHERE: lambda self: ("where", self._parse_where()), 1510 TokenType.GROUP_BY: lambda self: ("group", self._parse_group()), 1511 TokenType.HAVING: lambda self: ("having", self._parse_having()), 1512 TokenType.QUALIFY: lambda self: ("qualify", self._parse_qualify()), 1513 TokenType.WINDOW: lambda self: ("windows", self._parse_window_clause()), 1514 TokenType.ORDER_BY: lambda self: ("order", self._parse_order()), 1515 TokenType.LIMIT: lambda self: ("limit", self._parse_limit()), 1516 TokenType.FETCH: lambda self: ("limit", self._parse_limit()), 1517 TokenType.OFFSET: lambda self: ("offset", self._parse_offset()), 1518 TokenType.FOR: lambda self: ("locks", self._parse_locks()), 1519 TokenType.LOCK: lambda self: ("locks", self._parse_locks()), 1520 TokenType.TABLE_SAMPLE: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 1521 TokenType.USING: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 1522 TokenType.CLUSTER_BY: lambda self: ( 1523 "cluster", 1524 self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 1525 ), 1526 TokenType.DISTRIBUTE_BY: lambda self: ( 1527 "distribute", 1528 self._parse_sort(exp.Distribute, TokenType.DISTRIBUTE_BY), 1529 ), 1530 TokenType.SORT_BY: lambda self: ("sort", self._parse_sort(exp.Sort, TokenType.SORT_BY)), 1531 TokenType.CONNECT_BY: lambda self: ("connect", self._parse_connect(skip_start_token=True)), 1532 TokenType.START_WITH: lambda self: ("connect", self._parse_connect()), 1533 } 1534 QUERY_MODIFIER_TOKENS: t.ClassVar = set(QUERY_MODIFIER_PARSERS) 1535 1536 SET_PARSERS: t.ClassVar = { 1537 "GLOBAL": lambda self: self._parse_set_item_assignment("GLOBAL"), 1538 "LOCAL": lambda self: self._parse_set_item_assignment("LOCAL"), 1539 "SESSION": lambda self: self._parse_set_item_assignment("SESSION"), 1540 "TRANSACTION": lambda self: self._parse_set_transaction(), 1541 } 1542 1543 SHOW_PARSERS: t.ClassVar[dict[str, t.Callable]] = {} 1544 1545 TYPE_LITERAL_PARSERS: t.ClassVar = { 1546 exp.DType.JSON: lambda self, this, _: self.expression(exp.ParseJSON(this=this)), 1547 } 1548 1549 TYPE_CONVERTERS: t.ClassVar[dict[exp.DType, t.Callable[[exp.DataType], exp.DataType]]] = {} 1550 1551 DDL_SELECT_TOKENS: t.ClassVar = {TokenType.SELECT, TokenType.WITH, TokenType.L_PAREN} 1552 1553 PRE_VOLATILE_TOKENS: t.ClassVar = {TokenType.CREATE, TokenType.REPLACE, TokenType.UNIQUE} 1554 1555 TRANSACTION_KIND: t.ClassVar = {"DEFERRED", "IMMEDIATE", "EXCLUSIVE"} 1556 TRANSACTION_CHARACTERISTICS: t.ClassVar[OPTIONS_TYPE] = { 1557 "ISOLATION": ( 1558 ("LEVEL", "REPEATABLE", "READ"), 1559 ("LEVEL", "READ", "COMMITTED"), 1560 ("LEVEL", "READ", "UNCOMITTED"), 1561 ("LEVEL", "SERIALIZABLE"), 1562 ), 1563 "READ": ("WRITE", "ONLY"), 1564 } 1565 1566 CONFLICT_ACTIONS: t.ClassVar[OPTIONS_TYPE] = { 1567 **dict.fromkeys(("ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK", "UPDATE"), tuple()), 1568 "DO": ("NOTHING", "UPDATE"), 1569 } 1570 1571 TRIGGER_TIMING: t.ClassVar[OPTIONS_TYPE] = { 1572 "INSTEAD": (("OF",),), 1573 "BEFORE": tuple(), 1574 "AFTER": tuple(), 1575 } 1576 1577 TRIGGER_DEFERRABLE: t.ClassVar[OPTIONS_TYPE] = { 1578 "NOT": (("DEFERRABLE",),), 1579 "DEFERRABLE": tuple(), 1580 } 1581 1582 CREATE_SEQUENCE: t.ClassVar[OPTIONS_TYPE] = { 1583 "SCALE": ("EXTEND", "NOEXTEND"), 1584 "SHARD": ("EXTEND", "NOEXTEND"), 1585 "NO": ("CYCLE", "CACHE", "MAXVALUE", "MINVALUE"), 1586 **dict.fromkeys( 1587 ( 1588 "SESSION", 1589 "GLOBAL", 1590 "KEEP", 1591 "NOKEEP", 1592 "ORDER", 1593 "NOORDER", 1594 "NOCACHE", 1595 "CYCLE", 1596 "NOCYCLE", 1597 "NOMINVALUE", 1598 "NOMAXVALUE", 1599 "NOSCALE", 1600 "NOSHARD", 1601 ), 1602 tuple(), 1603 ), 1604 } 1605 1606 ISOLATED_LOADING_OPTIONS: t.ClassVar[OPTIONS_TYPE] = {"FOR": ("ALL", "INSERT", "NONE")} 1607 1608 USABLES: t.ClassVar[OPTIONS_TYPE] = dict.fromkeys( 1609 ("ROLE", "WAREHOUSE", "DATABASE", "SCHEMA", "CATALOG"), tuple() 1610 ) 1611 1612 CAST_ACTIONS: t.ClassVar[OPTIONS_TYPE] = dict.fromkeys(("RENAME", "ADD"), ("FIELDS",)) 1613 1614 SCHEMA_BINDING_OPTIONS: t.ClassVar[OPTIONS_TYPE] = { 1615 "TYPE": ("EVOLUTION",), 1616 **dict.fromkeys(("BINDING", "COMPENSATION", "EVOLUTION"), tuple()), 1617 } 1618 1619 PROCEDURE_OPTIONS: t.ClassVar[OPTIONS_TYPE] = {} 1620 1621 EXECUTE_AS_OPTIONS: t.ClassVar[OPTIONS_TYPE] = dict.fromkeys( 1622 ("CALLER", "SELF", "OWNER"), tuple() 1623 ) 1624 1625 KEY_CONSTRAINT_OPTIONS: t.ClassVar[OPTIONS_TYPE] = { 1626 "NOT": ("ENFORCED",), 1627 "MATCH": ( 1628 "FULL", 1629 "PARTIAL", 1630 "SIMPLE", 1631 ), 1632 "INITIALLY": ("DEFERRED", "IMMEDIATE"), 1633 "USING": ( 1634 "BTREE", 1635 "HASH", 1636 ), 1637 **dict.fromkeys(("DEFERRABLE", "NORELY", "RELY"), tuple()), 1638 } 1639 1640 WINDOW_EXCLUDE_OPTIONS: t.ClassVar[OPTIONS_TYPE] = { 1641 "NO": ("OTHERS",), 1642 "CURRENT": ("ROW",), 1643 **dict.fromkeys(("GROUP", "TIES"), tuple()), 1644 } 1645 1646 INSERT_ALTERNATIVES: t.ClassVar = {"ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK"} 1647 1648 CLONE_KEYWORDS: t.ClassVar = {"CLONE", "COPY"} 1649 HISTORICAL_DATA_PREFIX: t.ClassVar = {"AT", "BEFORE", "END"} 1650 HISTORICAL_DATA_KIND: t.ClassVar = {"OFFSET", "STATEMENT", "STREAM", "TIMESTAMP", "VERSION"} 1651 1652 OPCLASS_FOLLOW_KEYWORDS: t.ClassVar = {"ASC", "DESC", "NULLS", "WITH"} 1653 1654 OPTYPE_FOLLOW_TOKENS: t.ClassVar = {TokenType.COMMA, TokenType.R_PAREN} 1655 1656 TABLE_INDEX_HINT_TOKENS: t.ClassVar = {TokenType.FORCE, TokenType.IGNORE, TokenType.USE} 1657 1658 VIEW_ATTRIBUTES: t.ClassVar = {"ENCRYPTION", "SCHEMABINDING", "VIEW_METADATA"} 1659 1660 WINDOW_ALIAS_TOKENS: t.ClassVar = ID_VAR_TOKENS - {TokenType.RANGE, TokenType.ROWS} 1661 WINDOW_BEFORE_PAREN_TOKENS: t.ClassVar = {TokenType.OVER} 1662 WINDOW_SIDES: t.ClassVar = {"FOLLOWING", "PRECEDING"} 1663 1664 JSON_KEY_VALUE_SEPARATOR_TOKENS: t.ClassVar = {TokenType.COLON, TokenType.COMMA, TokenType.IS} 1665 1666 FETCH_TOKENS: t.ClassVar = ID_VAR_TOKENS - {TokenType.ROW, TokenType.ROWS, TokenType.PERCENT} 1667 1668 ADD_CONSTRAINT_TOKENS: t.ClassVar = { 1669 TokenType.CONSTRAINT, 1670 TokenType.FOREIGN_KEY, 1671 TokenType.INDEX, 1672 TokenType.KEY, 1673 TokenType.PRIMARY_KEY, 1674 TokenType.UNIQUE, 1675 } 1676 1677 DISTINCT_TOKENS: t.ClassVar = {TokenType.DISTINCT} 1678 1679 UNNEST_OFFSET_ALIAS_TOKENS: t.ClassVar = TABLE_ALIAS_TOKENS - SET_OPERATIONS 1680 1681 SELECT_START_TOKENS: t.ClassVar = {TokenType.L_PAREN, TokenType.WITH, TokenType.SELECT} 1682 1683 COPY_INTO_VARLEN_OPTIONS: t.ClassVar = { 1684 "FILE_FORMAT", 1685 "COPY_OPTIONS", 1686 "FORMAT_OPTIONS", 1687 "CREDENTIAL", 1688 } 1689 1690 IS_JSON_PREDICATE_KIND: t.ClassVar = {"VALUE", "SCALAR", "ARRAY", "OBJECT"} 1691 1692 ODBC_DATETIME_LITERALS: t.ClassVar[dict[str, type[exp.Expr]]] = {} 1693 1694 ON_CONDITION_TOKENS: t.ClassVar = {"ERROR", "NULL", "TRUE", "FALSE", "EMPTY"} 1695 1696 PRIVILEGE_FOLLOW_TOKENS: t.ClassVar = {TokenType.ON, TokenType.COMMA, TokenType.L_PAREN} 1697 1698 # The style options for the DESCRIBE statement 1699 DESCRIBE_STYLES: t.ClassVar = {"ANALYZE", "EXTENDED", "FORMATTED", "HISTORY"} 1700 1701 SET_ASSIGNMENT_DELIMITERS: t.ClassVar = {"=", ":=", "TO"} 1702 1703 # The style options for the ANALYZE statement 1704 ANALYZE_STYLES: t.ClassVar = { 1705 "BUFFER_USAGE_LIMIT", 1706 "FULL", 1707 "LOCAL", 1708 "NO_WRITE_TO_BINLOG", 1709 "SAMPLE", 1710 "SKIP_LOCKED", 1711 "VERBOSE", 1712 } 1713 1714 ANALYZE_EXPRESSION_PARSERS: t.ClassVar = { 1715 "ALL": lambda self: self._parse_analyze_columns(), 1716 "COMPUTE": lambda self: self._parse_analyze_statistics(), 1717 "DELETE": lambda self: self._parse_analyze_delete(), 1718 "DROP": lambda self: self._parse_analyze_histogram(), 1719 "ESTIMATE": lambda self: self._parse_analyze_statistics(), 1720 "LIST": lambda self: self._parse_analyze_list(), 1721 "PREDICATE": lambda self: self._parse_analyze_columns(), 1722 "UPDATE": lambda self: self._parse_analyze_histogram(), 1723 "VALIDATE": lambda self: self._parse_analyze_validate(), 1724 } 1725 1726 PARTITION_KEYWORDS: t.ClassVar = {"PARTITION", "SUBPARTITION"} 1727 1728 AMBIGUOUS_ALIAS_TOKENS: t.ClassVar = (TokenType.LIMIT, TokenType.OFFSET) 1729 1730 OPERATION_MODIFIERS: t.ClassVar[set[str]] = set() 1731 1732 RECURSIVE_CTE_SEARCH_KIND: t.ClassVar = {"BREADTH", "DEPTH", "CYCLE"} 1733 1734 SECURITY_PROPERTY_KEYWORDS: t.ClassVar = {"DEFINER", "INVOKER", "NONE"} 1735 1736 MODIFIABLES: t.ClassVar = (exp.Query, exp.Table, exp.TableFromRows, exp.Values) 1737 1738 STRICT_CAST: t.ClassVar = True 1739 1740 PREFIXED_PIVOT_COLUMNS: t.ClassVar = False 1741 IDENTIFY_PIVOT_STRINGS: t.ClassVar = False 1742 1743 LOG_DEFAULTS_TO_LN: t.ClassVar = False 1744 1745 # Whether the table sample clause expects CSV syntax 1746 TABLESAMPLE_CSV: t.ClassVar = False 1747 1748 # The default method used for table sampling 1749 DEFAULT_SAMPLING_METHOD: t.ClassVar[str | None] = None 1750 1751 # Whether the SET command needs a delimiter (e.g. "=") for assignments 1752 SET_REQUIRES_ASSIGNMENT_DELIMITER: t.ClassVar = True 1753 1754 # Whether the TRIM function expects the characters to trim as its first argument 1755 TRIM_PATTERN_FIRST: t.ClassVar = False 1756 1757 # Whether string aliases are supported `SELECT COUNT(*) 'count'` 1758 STRING_ALIASES: t.ClassVar = False 1759 1760 # Whether query modifiers such as LIMIT are attached to the UNION node (vs its right operand) 1761 MODIFIERS_ATTACHED_TO_SET_OP: t.ClassVar = True 1762 SET_OP_MODIFIERS: t.ClassVar = {"order", "limit", "offset"} 1763 1764 # Whether to parse IF statements that aren't followed by a left parenthesis as commands 1765 NO_PAREN_IF_COMMANDS: t.ClassVar = True 1766 1767 # Whether the -> and ->> operators expect documents of type JSON (e.g. Postgres) 1768 JSON_ARROWS_REQUIRE_JSON_TYPE: t.ClassVar = False 1769 1770 # Whether the `:` operator is used to extract a value from a VARIANT column 1771 COLON_IS_VARIANT_EXTRACT: t.ClassVar = False 1772 1773 # Whether or not a VALUES keyword needs to be followed by '(' to form a VALUES clause. 1774 # If this is True and '(' is not found, the keyword will be treated as an identifier 1775 VALUES_FOLLOWED_BY_PAREN: t.ClassVar = True 1776 1777 # Whether implicit unnesting is supported, e.g. SELECT 1 FROM y.z AS z, z.a (Redshift) 1778 SUPPORTS_IMPLICIT_UNNEST: t.ClassVar = False 1779 1780 # Whether or not interval spans are supported, INTERVAL 1 YEAR TO MONTHS 1781 INTERVAL_SPANS: t.ClassVar = True 1782 1783 # Whether a PARTITION clause can follow a table reference 1784 SUPPORTS_PARTITION_SELECTION: t.ClassVar = False 1785 1786 # Whether the `name AS expr` schema/column constraint requires parentheses around `expr` 1787 WRAPPED_TRANSFORM_COLUMN_CONSTRAINT: t.ClassVar = True 1788 1789 # Whether the 'AS' keyword is optional in the CTE definition syntax 1790 OPTIONAL_ALIAS_TOKEN_CTE: t.ClassVar = True 1791 1792 # Whether renaming a column with an ALTER statement requires the presence of the COLUMN keyword 1793 ALTER_RENAME_REQUIRES_COLUMN: t.ClassVar = True 1794 1795 # Whether Alter statements are allowed to contain Partition specifications 1796 ALTER_TABLE_PARTITIONS: t.ClassVar = False 1797 1798 # Whether all join types have the same precedence, i.e., they "naturally" produce a left-deep tree. 1799 # In standard SQL, joins that use the JOIN keyword take higher precedence than comma-joins. That is 1800 # to say, JOIN operators happen before comma operators. This is not the case in some dialects, such 1801 # as BigQuery, where all joins have the same precedence. 1802 JOINS_HAVE_EQUAL_PRECEDENCE: t.ClassVar = False 1803 1804 # Whether TIMESTAMP <literal> can produce a zone-aware timestamp 1805 ZONE_AWARE_TIMESTAMP_CONSTRUCTOR: t.ClassVar = False 1806 1807 # Whether map literals support arbitrary expressions as keys. 1808 # When True, allows complex keys like arrays or literals: {[1, 2]: 3}, {1: 2} (e.g. DuckDB). 1809 # When False, keys are typically restricted to identifiers. 1810 MAP_KEYS_ARE_ARBITRARY_EXPRESSIONS: t.ClassVar = False 1811 1812 # Whether JSON_EXTRACT requires a JSON expression as the first argument, e.g this 1813 # is true for Snowflake but not for BigQuery which can also process strings 1814 JSON_EXTRACT_REQUIRES_JSON_EXPRESSION: t.ClassVar = False 1815 1816 # Dialects like Databricks support JOINS without join criteria 1817 # Adding an ON TRUE, makes transpilation semantically correct for other dialects 1818 ADD_JOIN_ON_TRUE: t.ClassVar = False 1819 1820 # Whether INTERVAL spans with literal format '\d+ hh:[mm:[ss[.ff]]]' 1821 # can omit the span unit `DAY TO MINUTE` or `DAY TO SECOND` 1822 SUPPORTS_OMITTED_INTERVAL_SPAN_UNIT: t.ClassVar = False 1823 1824 SHOW_TRIE: t.ClassVar[dict] = new_trie(key.split(" ") for key in SHOW_PARSERS) 1825 SET_TRIE: t.ClassVar[dict] = new_trie(key.split(" ") for key in SET_PARSERS) 1826 1827 def __init__( 1828 self, 1829 error_level: ErrorLevel | None = None, 1830 error_message_context: int = 100, 1831 max_errors: int = 3, 1832 max_nodes: int = -1, 1833 dialect: DialectType = None, 1834 ): 1835 self.error_level: ErrorLevel = error_level or ErrorLevel.IMMEDIATE 1836 self.error_message_context: int = error_message_context 1837 self.max_errors: int = max_errors 1838 self.max_nodes: int = max_nodes 1839 self.dialect: t.Any = _resolve_dialect(dialect) 1840 self.sql: str = "" 1841 self.errors: list[ParseError] = [] 1842 self._tokens: list[Token] = [] 1843 self._tokens_size: i64 = 0 1844 self._index: i64 = 0 1845 self._curr: Token = SENTINEL_NONE 1846 self._next: Token = SENTINEL_NONE 1847 self._prev: Token = SENTINEL_NONE 1848 self._prev_comments: list[str] = [] 1849 self._pipe_cte_counter: int = 0 1850 self._chunks: list[list[Token]] = [] 1851 self._chunk_index: i64 = 0 1852 self._node_count: int = 0 1853 1854 def reset(self) -> None: 1855 self.sql = "" 1856 self.errors = [] 1857 self._tokens = [] 1858 self._tokens_size = 0 1859 self._index = 0 1860 self._curr = SENTINEL_NONE 1861 self._next = SENTINEL_NONE 1862 self._prev = SENTINEL_NONE 1863 self._prev_comments = [] 1864 self._pipe_cte_counter = 0 1865 self._chunks = [] 1866 self._chunk_index = 0 1867 self._node_count = 0 1868 1869 def _advance(self, times: i64 = 1) -> None: 1870 index = self._index + times 1871 self._index = index 1872 tokens = self._tokens 1873 size = self._tokens_size 1874 self._curr = tokens[index] if index < size else SENTINEL_NONE 1875 self._next = tokens[index + 1] if index + 1 < size else SENTINEL_NONE 1876 1877 if index > 0: 1878 prev = tokens[index - 1] 1879 self._prev = prev 1880 self._prev_comments = prev.comments 1881 else: 1882 self._prev = SENTINEL_NONE 1883 self._prev_comments = [] 1884 1885 def _advance_chunk(self) -> None: 1886 self._index = -1 1887 self._tokens = self._chunks[self._chunk_index] 1888 self._tokens_size = i64(len(self._tokens)) 1889 self._chunk_index += 1 1890 self._advance() 1891 1892 def _retreat(self, index: i64) -> None: 1893 if index != self._index: 1894 self._advance(index - self._index) 1895 1896 def _add_comments(self, expression: exp.Expr | None) -> None: 1897 if expression and self._prev_comments: 1898 expression.add_comments(self._prev_comments) 1899 self._prev_comments = [] 1900 1901 def _match( 1902 self, token_type: TokenType, advance: bool = True, expression: exp.Expr | None = None 1903 ) -> bool: 1904 if self._curr.token_type == token_type: 1905 if advance: 1906 self._advance() 1907 self._add_comments(expression) 1908 return True 1909 return False 1910 1911 def _match_set(self, types: t.Collection[TokenType], advance: bool = True) -> bool: 1912 if self._curr.token_type in types: 1913 if advance: 1914 self._advance() 1915 return True 1916 return False 1917 1918 def _match_pair( 1919 self, token_type_a: TokenType, token_type_b: TokenType, advance: bool = True 1920 ) -> bool: 1921 if self._curr.token_type == token_type_a and self._next.token_type == token_type_b: 1922 if advance: 1923 self._advance(2) 1924 return True 1925 return False 1926 1927 def _match_texts(self, texts: t.Collection[str], advance: bool = True) -> bool: 1928 if self._curr.token_type != TokenType.STRING and self._curr.text.upper() in texts: 1929 if advance: 1930 self._advance() 1931 return True 1932 return False 1933 1934 def _match_text_seq(self, *texts: str, advance: bool = True) -> bool: 1935 index = self._index 1936 string_type = TokenType.STRING 1937 for text in texts: 1938 if self._curr.token_type != string_type and self._curr.text.upper() == text: 1939 self._advance() 1940 else: 1941 self._retreat(index) 1942 return False 1943 1944 if not advance: 1945 self._retreat(index) 1946 1947 return True 1948 1949 def _is_connected(self) -> bool: 1950 prev = self._prev 1951 curr = self._curr 1952 return bool(prev and curr and prev.end + 1 == curr.start) 1953 1954 def _find_sql(self, start: Token, end: Token) -> str: 1955 return self.sql[start.start : end.end + 1] 1956 1957 def raise_error(self, message: str, token: Token = SENTINEL_NONE) -> None: 1958 token = token or self._curr or self._prev or Token.string("") 1959 formatted_sql, start_context, highlight, end_context = highlight_sql( 1960 sql=self.sql, 1961 positions=[(token.start, token.end)], 1962 context_length=self.error_message_context, 1963 ) 1964 formatted_message = f"{message}. Line {token.line}, Col: {token.col}.\n {formatted_sql}" 1965 1966 error = ParseError.new( 1967 formatted_message, 1968 description=message, 1969 line=token.line, 1970 col=token.col, 1971 start_context=start_context, 1972 highlight=highlight, 1973 end_context=end_context, 1974 ) 1975 1976 if self.error_level == ErrorLevel.IMMEDIATE: 1977 raise error 1978 1979 self.errors.append(error) 1980 1981 def validate_expression(self, expression: E, args: list | None = None) -> E: 1982 if self.max_nodes > -1: 1983 self._node_count += 1 1984 if self._node_count > self.max_nodes: 1985 self.raise_error(f"Maximum number of AST nodes ({self.max_nodes}) exceeded") 1986 if self.error_level != ErrorLevel.IGNORE: 1987 for error_message in expression.error_messages(args): 1988 self.raise_error(error_message) 1989 return expression 1990 1991 def _try_parse(self, parse_method: t.Callable[[], T], retreat: bool = False) -> T | None: 1992 index = self._index 1993 error_level = self.error_level 1994 this: T | None = None 1995 1996 self.error_level = ErrorLevel.IMMEDIATE 1997 try: 1998 this = parse_method() 1999 except ParseError: 2000 this = None 2001 finally: 2002 if not this or retreat: 2003 self._retreat(index) 2004 self.error_level = error_level 2005 2006 return this 2007 2008 def parse(self, raw_tokens: list[Token], sql: str) -> list[exp.Expr | None]: 2009 """ 2010 Parses a list of tokens and returns a list of syntax trees, one tree 2011 per parsed SQL statement. 2012 2013 Args: 2014 raw_tokens: The list of tokens. 2015 sql: The original SQL string. 2016 2017 Returns: 2018 The list of the produced syntax trees. 2019 """ 2020 return self._parse( 2021 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 2022 ) 2023 2024 def parse_into( 2025 self, 2026 expression_types: exp.IntoType, 2027 raw_tokens: list[Token], 2028 sql: str | None = None, 2029 ) -> list[exp.Expr | None]: 2030 """ 2031 Parses a list of tokens into a given Expr type. If a collection of Expr 2032 types is given instead, this method will try to parse the token list into each one 2033 of them, stopping at the first for which the parsing succeeds. 2034 2035 Args: 2036 expression_types: The expression type(s) to try and parse the token list into. 2037 raw_tokens: The list of tokens. 2038 sql: The original SQL string, used to produce helpful debug messages. 2039 2040 Returns: 2041 The target Expr. 2042 """ 2043 errors = [] 2044 for expression_type in ensure_list(expression_types): 2045 parser = self.EXPRESSION_PARSERS.get(t.cast(type[exp.Expr], expression_type)) 2046 if not parser: 2047 raise TypeError(f"No parser registered for {expression_type}") 2048 2049 try: 2050 return self._parse(parser, raw_tokens, sql) 2051 except ParseError as e: 2052 e.errors[0]["into_expression"] = expression_type 2053 errors.append(e) 2054 2055 raise ParseError( 2056 f"Failed to parse '{sql or raw_tokens}' into {expression_types}", 2057 errors=merge_errors(errors), 2058 ) from errors[-1] 2059 2060 def check_errors(self) -> None: 2061 """Logs or raises any found errors, depending on the chosen error level setting.""" 2062 if self.error_level == ErrorLevel.WARN: 2063 for error in self.errors: 2064 logger.error(str(error)) 2065 elif self.error_level == ErrorLevel.RAISE and self.errors: 2066 raise ParseError( 2067 concat_messages(self.errors, self.max_errors), 2068 errors=merge_errors(self.errors), 2069 ) 2070 2071 def expression( 2072 self, 2073 instance: E, 2074 token: Token | None = None, 2075 comments: list[str] | None = None, 2076 ) -> E: 2077 if token: 2078 instance.update_positions(token) 2079 instance.add_comments(comments) if comments else self._add_comments(instance) 2080 if not instance.is_primitive: 2081 instance = self.validate_expression(instance) 2082 return instance 2083 2084 def _parse_batch_statements( 2085 self, 2086 parse_method: t.Callable[[Parser], exp.Expr | None], 2087 sep_first_statement: bool = True, 2088 ) -> list[exp.Expr | None]: 2089 expressions = [] 2090 2091 # Chunkification binds if/while statements with the first statement of the body 2092 if sep_first_statement: 2093 self._match(TokenType.BEGIN) 2094 expressions.append(parse_method(self)) 2095 2096 chunks_length = len(self._chunks) 2097 while self._chunk_index < chunks_length: 2098 self._advance_chunk() 2099 2100 if self._match(TokenType.ELSE, advance=False): 2101 return expressions 2102 2103 if expressions and not self._next and self._match(TokenType.END): 2104 expressions.append(exp.EndStatement()) 2105 continue 2106 2107 expressions.append(parse_method(self)) 2108 2109 if self._index < self._tokens_size: 2110 self.raise_error("Invalid expression / Unexpected token") 2111 2112 self.check_errors() 2113 2114 return expressions 2115 2116 def _parse( 2117 self, 2118 parse_method: t.Callable[[Parser], exp.Expr | None], 2119 raw_tokens: list[Token], 2120 sql: str | None = None, 2121 ) -> list[exp.Expr | None]: 2122 self.reset() 2123 self.sql = sql or "" 2124 2125 total = len(raw_tokens) 2126 chunks: list[list[Token]] = [[]] 2127 2128 for i, token in enumerate(raw_tokens): 2129 if token.token_type == TokenType.SEMICOLON: 2130 if token.comments: 2131 chunks.append([token]) 2132 2133 if i < total - 1: 2134 chunks.append([]) 2135 else: 2136 chunks[-1].append(token) 2137 2138 self._chunks = chunks 2139 2140 return self._parse_batch_statements(parse_method=parse_method, sep_first_statement=False) 2141 2142 def _warn_unsupported(self) -> None: 2143 if self._tokens_size <= 1: 2144 return 2145 2146 # We use _find_sql because self.sql may comprise multiple chunks, and we're only 2147 # interested in emitting a warning for the one being currently processed. 2148 sql = self._find_sql(self._tokens[0], self._tokens[-1])[: self.error_message_context] 2149 2150 logger.warning( 2151 f"'{sql}' contains unsupported syntax. Falling back to parsing as a 'Command'." 2152 ) 2153 2154 def _parse_command(self) -> exp.Command: 2155 self._warn_unsupported() 2156 comments = self._prev_comments 2157 return self.expression( 2158 exp.Command(this=self._prev.text.upper(), expression=self._parse_string()), 2159 comments=comments, 2160 ) 2161 2162 def _parse_comment(self, allow_exists: bool = True) -> exp.Expr: 2163 start = self._prev 2164 exists = self._parse_exists() if allow_exists else None 2165 2166 self._match(TokenType.ON) 2167 2168 materialized = self._match_text_seq("MATERIALIZED") 2169 kind = self._match_set(self.CREATABLES) and self._prev 2170 if not kind: 2171 return self._parse_as_command(start) 2172 2173 if kind.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 2174 this = self._parse_user_defined_function(kind=kind.token_type) 2175 elif kind.token_type == TokenType.TABLE: 2176 this = self._parse_table(alias_tokens=self.COMMENT_TABLE_ALIAS_TOKENS) 2177 elif kind.token_type == TokenType.COLUMN: 2178 this = self._parse_column() 2179 else: 2180 this = self._parse_id_var() 2181 2182 self._match(TokenType.IS) 2183 2184 return self.expression( 2185 exp.Comment( 2186 this=this, 2187 kind=kind.text, 2188 expression=self._parse_string(), 2189 exists=exists, 2190 materialized=materialized, 2191 ) 2192 ) 2193 2194 def _parse_to_table( 2195 self, 2196 ) -> exp.ToTableProperty: 2197 table = self._parse_table_parts(schema=True) 2198 return self.expression(exp.ToTableProperty(this=table)) 2199 2200 # https://clickhouse.com/docs/en/engines/table-engines/mergetree-family/mergetree#mergetree-table-ttl 2201 def _parse_ttl(self) -> exp.Expr: 2202 def _parse_ttl_action() -> exp.Expr | None: 2203 this = self._parse_bitwise() 2204 2205 if self._match_text_seq("DELETE"): 2206 return self.expression(exp.MergeTreeTTLAction(this=this, delete=True)) 2207 if self._match_text_seq("RECOMPRESS"): 2208 return self.expression( 2209 exp.MergeTreeTTLAction(this=this, recompress=self._parse_bitwise()) 2210 ) 2211 if self._match_text_seq("TO", "DISK"): 2212 return self.expression( 2213 exp.MergeTreeTTLAction(this=this, to_disk=self._parse_string()) 2214 ) 2215 if self._match_text_seq("TO", "VOLUME"): 2216 return self.expression( 2217 exp.MergeTreeTTLAction(this=this, to_volume=self._parse_string()) 2218 ) 2219 2220 return this 2221 2222 expressions = self._parse_csv(_parse_ttl_action) 2223 where = self._parse_where() 2224 group = self._parse_group() 2225 2226 aggregates = None 2227 if group and self._match(TokenType.SET): 2228 aggregates = self._parse_csv(self._parse_set_item) 2229 2230 return self.expression( 2231 exp.MergeTreeTTL( 2232 expressions=expressions, where=where, group=group, aggregates=aggregates 2233 ) 2234 ) 2235 2236 def _parse_condition(self) -> exp.Expr | None: 2237 return self._parse_wrapped(parse_method=self._parse_expression, optional=True) 2238 2239 def _parse_block(self) -> exp.Block: 2240 return self.expression( 2241 exp.Block( 2242 expressions=self._parse_batch_statements( 2243 parse_method=lambda self: self._parse_statement() 2244 ) 2245 ) 2246 ) 2247 2248 def _parse_whileblock(self) -> exp.WhileBlock: 2249 return self.expression( 2250 exp.WhileBlock(this=self._parse_condition(), body=self._parse_block()) 2251 ) 2252 2253 def _parse_statement(self) -> exp.Expr | None: 2254 if not self._curr: 2255 return None 2256 2257 if self._match_set(self.STATEMENT_PARSERS): 2258 comments = self._prev_comments 2259 stmt = self.STATEMENT_PARSERS[self._prev.token_type](self) 2260 stmt.add_comments(comments, prepend=True) 2261 return stmt 2262 2263 if self._match_set(self.dialect.tokenizer_class.COMMANDS): 2264 return self._parse_command() 2265 2266 if self._match_text_seq("WHILE"): 2267 return self._parse_whileblock() 2268 2269 expression = self._parse_expression() 2270 expression = self._parse_set_operations(expression) if expression else self._parse_select() 2271 2272 if isinstance(expression, exp.Subquery) and self._match(TokenType.PIPE_GT, advance=False): 2273 expression = self._parse_pipe_syntax_query(expression) 2274 2275 return self._parse_query_modifiers(expression) 2276 2277 def _parse_drop(self, exists: bool = False) -> exp.Drop | exp.Command: 2278 start = self._prev 2279 temporary = self._match(TokenType.TEMPORARY) 2280 materialized = self._match_text_seq("MATERIALIZED") 2281 iceberg = self._match_text_seq("ICEBERG") 2282 2283 kind = self._match_set(self.CREATABLES) and self._prev.text.upper() 2284 if not kind or (iceberg and kind and kind != "TABLE"): 2285 return self._parse_as_command(start) 2286 2287 concurrently = self._match_text_seq("CONCURRENTLY") 2288 if_exists = exists or self._parse_exists() 2289 2290 if kind == "COLUMN": 2291 this = self._parse_column() 2292 else: 2293 this = self._parse_table_parts(schema=True, is_db_reference=kind == "SCHEMA") 2294 2295 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 2296 2297 if self._match(TokenType.L_PAREN, advance=False): 2298 expressions = self._parse_wrapped_csv(self._parse_types) 2299 else: 2300 expressions = None 2301 2302 cascade_or_restrict = self._match_texts(("CASCADE", "RESTRICT")) and self._prev.text.upper() 2303 2304 return self.expression( 2305 exp.Drop( 2306 exists=if_exists, 2307 this=this, 2308 expressions=expressions, 2309 kind=self.dialect.CREATABLE_KIND_MAPPING.get(kind) or kind, 2310 temporary=temporary, 2311 materialized=materialized, 2312 cascade=cascade_or_restrict == "CASCADE", 2313 restrict=cascade_or_restrict == "RESTRICT", 2314 constraints=self._match_text_seq("CONSTRAINTS"), 2315 purge=self._match_text_seq("PURGE"), 2316 cluster=cluster, 2317 concurrently=concurrently, 2318 sync=self._match_text_seq("SYNC"), 2319 iceberg=iceberg, 2320 ) 2321 ) 2322 2323 def _parse_exists(self, not_: bool = False) -> bool | None: 2324 return ( 2325 self._match_text_seq("IF") 2326 and (not not_ or self._match(TokenType.NOT)) 2327 and self._match(TokenType.EXISTS) 2328 ) 2329 2330 def _parse_create(self) -> exp.Create | exp.Command: 2331 # Note: this can't be None because we've matched a statement parser 2332 start = self._prev 2333 2334 replace = ( 2335 start.token_type == TokenType.REPLACE 2336 or self._match_pair(TokenType.OR, TokenType.REPLACE) 2337 or self._match_pair(TokenType.OR, TokenType.ALTER) 2338 ) 2339 refresh = self._match_pair(TokenType.OR, TokenType.REFRESH) 2340 2341 unique = self._match(TokenType.UNIQUE) 2342 2343 if self._match_text_seq("CLUSTERED", "COLUMNSTORE"): 2344 clustered = True 2345 elif self._match_text_seq("NONCLUSTERED", "COLUMNSTORE") or self._match_text_seq( 2346 "COLUMNSTORE" 2347 ): 2348 clustered = False 2349 else: 2350 clustered = None 2351 2352 if self._match_pair(TokenType.TABLE, TokenType.FUNCTION, advance=False): 2353 self._advance() 2354 2355 properties = None 2356 create_token = self._match_set(self.CREATABLES) and self._prev 2357 2358 if not create_token: 2359 # exp.Properties.Location.POST_CREATE 2360 properties = self._parse_properties() 2361 create_token = self._match_set(self.CREATABLES) and self._prev 2362 2363 if not properties or not create_token: 2364 return self._parse_as_command(start) 2365 2366 create_token_type = t.cast(Token, create_token).token_type 2367 2368 concurrently = self._match_text_seq("CONCURRENTLY") 2369 exists = self._parse_exists(not_=True) 2370 this = None 2371 expression: exp.Expr | None = None 2372 indexes = None 2373 no_schema_binding = None 2374 begin = None 2375 clone = None 2376 2377 def extend_props(temp_props: exp.Properties | None) -> None: 2378 nonlocal properties 2379 if properties and temp_props: 2380 properties.expressions.extend(temp_props.expressions) 2381 elif temp_props: 2382 properties = temp_props 2383 2384 if create_token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 2385 this = self._parse_user_defined_function(kind=create_token_type) 2386 2387 # exp.Properties.Location.POST_SCHEMA ("schema" here is the UDF's type signature) 2388 extend_props(self._parse_properties()) 2389 2390 expression = self._parse_heredoc() if self._match(TokenType.ALIAS) else None 2391 extend_props(self._parse_function_properties()) 2392 2393 if not expression: 2394 if self._match(TokenType.COMMAND): 2395 expression = self._parse_as_command(self._prev) 2396 else: 2397 begin = self._match(TokenType.BEGIN) 2398 return_ = self._match_text_seq("RETURN") 2399 2400 if self._match(TokenType.STRING, advance=False): 2401 # Takes care of BigQuery's JavaScript UDF definitions that end in an OPTIONS property 2402 # # https://cloud.google.com/bigquery/docs/reference/standard-sql/data-definition-language#create_function_statement 2403 expression = self._parse_string() 2404 extend_props(self._parse_properties()) 2405 else: 2406 expression = ( 2407 self._parse_user_defined_function_expression() 2408 if create_token_type == TokenType.FUNCTION 2409 else self._parse_block() 2410 ) 2411 2412 if return_: 2413 expression = self.expression(exp.Return(this=expression)) 2414 elif create_token_type == TokenType.INDEX: 2415 # Postgres allows anonymous indexes, eg. CREATE INDEX IF NOT EXISTS ON t(c) 2416 if not self._match(TokenType.ON): 2417 index = self._parse_id_var() 2418 anonymous = False 2419 else: 2420 index = None 2421 anonymous = True 2422 2423 this = self._parse_index(index=index, anonymous=anonymous) 2424 elif ( 2425 create_token_type == TokenType.CONSTRAINT and self._match(TokenType.TRIGGER) 2426 ) or create_token_type == TokenType.TRIGGER: 2427 if is_constraint := (create_token_type == TokenType.CONSTRAINT): 2428 create_token = self._prev 2429 2430 trigger_name = self._parse_id_var() 2431 if not trigger_name: 2432 return self._parse_as_command(start) 2433 2434 timing_var = self._parse_var_from_options(self.TRIGGER_TIMING, raise_unmatched=False) 2435 timing = timing_var.this if timing_var else None 2436 if not timing: 2437 return self._parse_as_command(start) 2438 2439 events = self._parse_trigger_events() 2440 if not self._match(TokenType.ON): 2441 self.raise_error("Expected ON in trigger definition") 2442 2443 table = self._parse_table_parts() 2444 referenced_table = self._parse_table_parts() if self._match(TokenType.FROM) else None 2445 deferrable, initially = self._parse_trigger_deferrable() 2446 referencing = self._parse_trigger_referencing() 2447 for_each = self._parse_trigger_for_each() 2448 when = self._match_text_seq("WHEN") and self._parse_wrapped( 2449 self._parse_disjunction, optional=True 2450 ) 2451 execute = self._parse_trigger_execute() 2452 2453 if execute is None: 2454 return self._parse_as_command(start) 2455 2456 trigger_props = self.expression( 2457 exp.TriggerProperties( 2458 table=table, 2459 timing=timing, 2460 events=events, 2461 execute=execute, 2462 constraint=is_constraint, 2463 referenced_table=referenced_table, 2464 deferrable=deferrable, 2465 initially=initially, 2466 referencing=referencing, 2467 for_each=for_each, 2468 when=when, 2469 ) 2470 ) 2471 2472 this = trigger_name 2473 extend_props(exp.Properties(expressions=[trigger_props] if trigger_props else [])) 2474 elif create_token_type in self.DB_CREATABLES: 2475 table_parts = self._parse_table_parts( 2476 schema=True, is_db_reference=create_token_type == TokenType.SCHEMA 2477 ) 2478 2479 # exp.Properties.Location.POST_NAME 2480 self._match(TokenType.COMMA) 2481 extend_props(self._parse_properties(before=True)) 2482 2483 this = self._parse_schema(this=table_parts) 2484 2485 # exp.Properties.Location.POST_SCHEMA and POST_WITH 2486 extend_props(self._parse_properties()) 2487 2488 has_alias = self._match(TokenType.ALIAS) 2489 if not self._match_set(self.DDL_SELECT_TOKENS, advance=False): 2490 # exp.Properties.Location.POST_ALIAS 2491 extend_props(self._parse_properties()) 2492 2493 if create_token_type == TokenType.SEQUENCE: 2494 expression = self._parse_types() 2495 props = self._parse_properties() 2496 if props: 2497 sequence_props = exp.SequenceProperties() 2498 options = [] 2499 for prop in props: 2500 if isinstance(prop, exp.SequenceProperties): 2501 for arg, value in prop.args.items(): 2502 if arg == "options": 2503 options.extend(value) 2504 else: 2505 sequence_props.set(arg, value) 2506 prop.pop() 2507 2508 if options: 2509 sequence_props.set("options", options) 2510 2511 props.append("expressions", sequence_props) 2512 extend_props(props) 2513 else: 2514 expression = self._parse_ddl_select() 2515 2516 # Some dialects also support using a table as an alias instead of a SELECT. 2517 # Here we fallback to this as an alternative. 2518 if not expression and has_alias: 2519 expression = self._try_parse(self._parse_table_parts) 2520 2521 if create_token_type == TokenType.TABLE: 2522 # exp.Properties.Location.POST_EXPRESSION 2523 extend_props(self._parse_properties()) 2524 2525 indexes = [] 2526 while True: 2527 index = self._parse_index() 2528 2529 # exp.Properties.Location.POST_INDEX 2530 extend_props(self._parse_properties()) 2531 if not index: 2532 break 2533 else: 2534 self._match(TokenType.COMMA) 2535 indexes.append(index) 2536 elif create_token_type == TokenType.VIEW: 2537 if self._match_text_seq("WITH", "NO", "SCHEMA", "BINDING"): 2538 no_schema_binding = True 2539 elif create_token_type in (TokenType.SINK, TokenType.SOURCE): 2540 extend_props(self._parse_properties()) 2541 2542 shallow = self._match_text_seq("SHALLOW") 2543 2544 if self._match_texts(self.CLONE_KEYWORDS): 2545 copy = self._prev.text.lower() == "copy" 2546 clone = self.expression( 2547 exp.Clone(this=self._parse_table(schema=True), shallow=shallow, copy=copy) 2548 ) 2549 2550 if self._curr and not self._match_set((TokenType.R_PAREN, TokenType.COMMA), advance=False): 2551 return self._parse_as_command(start) 2552 2553 create_kind_text = create_token.text.upper() 2554 return self.expression( 2555 exp.Create( 2556 this=this, 2557 kind=self.dialect.CREATABLE_KIND_MAPPING.get(create_kind_text) or create_kind_text, 2558 replace=replace, 2559 refresh=refresh, 2560 unique=unique, 2561 expression=expression, 2562 exists=exists, 2563 properties=properties, 2564 indexes=indexes, 2565 no_schema_binding=no_schema_binding, 2566 begin=begin, 2567 clone=clone, 2568 concurrently=concurrently, 2569 clustered=clustered, 2570 ) 2571 ) 2572 2573 def _parse_sequence_properties(self) -> exp.SequenceProperties | None: 2574 seq = exp.SequenceProperties() 2575 2576 options = [] 2577 index = self._index 2578 2579 while self._curr: 2580 self._match(TokenType.COMMA) 2581 if self._match_text_seq("INCREMENT"): 2582 self._match_text_seq("BY") 2583 self._match_text_seq("=") 2584 seq.set("increment", self._parse_term()) 2585 elif self._match_text_seq("MINVALUE"): 2586 seq.set("minvalue", self._parse_term()) 2587 elif self._match_text_seq("MAXVALUE"): 2588 seq.set("maxvalue", self._parse_term()) 2589 elif self._match(TokenType.START_WITH) or self._match_text_seq("START"): 2590 self._match_text_seq("=") 2591 seq.set("start", self._parse_term()) 2592 elif self._match_text_seq("CACHE"): 2593 # T-SQL allows empty CACHE which is initialized dynamically 2594 seq.set("cache", self._parse_number() or True) 2595 elif self._match_text_seq("OWNED", "BY"): 2596 # "OWNED BY NONE" is the default 2597 seq.set("owned", None if self._match_text_seq("NONE") else self._parse_column()) 2598 else: 2599 opt = self._parse_var_from_options(self.CREATE_SEQUENCE, raise_unmatched=False) 2600 if opt: 2601 options.append(opt) 2602 else: 2603 break 2604 2605 seq.set("options", options if options else None) 2606 return None if self._index == index else seq 2607 2608 def _parse_trigger_events(self) -> list[exp.TriggerEvent]: 2609 events = [] 2610 2611 while True: 2612 event_type = self._match_set(self.TRIGGER_EVENTS) and self._prev.text.upper() 2613 2614 if not event_type: 2615 self.raise_error("Expected trigger event (INSERT, UPDATE, DELETE, TRUNCATE)") 2616 2617 columns = ( 2618 self._parse_csv(self._parse_column) 2619 if event_type == "UPDATE" and self._match_text_seq("OF") 2620 else None 2621 ) 2622 2623 events.append(self.expression(exp.TriggerEvent(this=event_type, columns=columns))) 2624 2625 if not self._match(TokenType.OR): 2626 break 2627 2628 return events 2629 2630 def _parse_trigger_deferrable( 2631 self, 2632 ) -> tuple[str | None, str | None]: 2633 deferrable_var = self._parse_var_from_options( 2634 self.TRIGGER_DEFERRABLE, raise_unmatched=False 2635 ) 2636 deferrable = deferrable_var.this if deferrable_var else None 2637 2638 initially = None 2639 if deferrable and self._match_text_seq("INITIALLY"): 2640 initially = ( 2641 self._prev.text.upper() if self._match_texts(("IMMEDIATE", "DEFERRED")) else None 2642 ) 2643 2644 return deferrable, initially 2645 2646 def _parse_trigger_referencing_clause(self, keyword: str) -> exp.Expr | None: 2647 if not self._match_text_seq(keyword): 2648 return None 2649 if not self._match_text_seq("TABLE"): 2650 self.raise_error(f"Expected TABLE after {keyword} in REFERENCING clause") 2651 self._match_text_seq("AS") 2652 return self._parse_id_var() 2653 2654 def _parse_trigger_referencing(self) -> exp.TriggerReferencing | None: 2655 if not self._match_text_seq("REFERENCING"): 2656 return None 2657 2658 old_alias = None 2659 new_alias = None 2660 2661 while True: 2662 if alias := self._parse_trigger_referencing_clause("OLD"): 2663 if old_alias is not None: 2664 self.raise_error("Duplicate OLD clause in REFERENCING") 2665 old_alias = alias 2666 elif alias := self._parse_trigger_referencing_clause("NEW"): 2667 if new_alias is not None: 2668 self.raise_error("Duplicate NEW clause in REFERENCING") 2669 new_alias = alias 2670 else: 2671 break 2672 2673 if old_alias is None and new_alias is None: 2674 self.raise_error("REFERENCING clause requires at least OLD TABLE or NEW TABLE") 2675 2676 return self.expression(exp.TriggerReferencing(old=old_alias, new=new_alias)) 2677 2678 def _parse_trigger_for_each(self) -> str | None: 2679 if not self._match_text_seq("FOR", "EACH"): 2680 return None 2681 2682 return self._prev.text.upper() if self._match_texts(("ROW", "STATEMENT")) else None 2683 2684 def _parse_trigger_execute(self) -> exp.TriggerExecute | None: 2685 if not self._match(TokenType.EXECUTE): 2686 return None 2687 2688 if not self._match_set((TokenType.FUNCTION, TokenType.PROCEDURE)): 2689 self.raise_error("Expected FUNCTION or PROCEDURE after EXECUTE") 2690 2691 func_call = self._parse_column() 2692 return self.expression(exp.TriggerExecute(this=func_call)) 2693 2694 def _parse_property_before(self) -> exp.Expr | list[exp.Expr] | None: 2695 # only used for teradata currently 2696 self._match(TokenType.COMMA) 2697 2698 kwargs = { 2699 "no": self._match_text_seq("NO"), 2700 "dual": self._match_text_seq("DUAL"), 2701 "before": self._match_text_seq("BEFORE"), 2702 "default": self._match_text_seq("DEFAULT"), 2703 "local": (self._match_text_seq("LOCAL") and "LOCAL") 2704 or (self._match_text_seq("NOT", "LOCAL") and "NOT LOCAL"), 2705 "after": self._match_text_seq("AFTER"), 2706 "minimum": self._match_texts(("MIN", "MINIMUM")), 2707 "maximum": self._match_texts(("MAX", "MAXIMUM")), 2708 } 2709 2710 if self._match_texts(self.PROPERTY_PARSERS): 2711 parser = self.PROPERTY_PARSERS[self._prev.text.upper()] 2712 try: 2713 return parser(self, **{k: v for k, v in kwargs.items() if v}) 2714 except TypeError: 2715 self.raise_error(f"Cannot parse property '{self._prev.text}'") 2716 2717 return None 2718 2719 def _parse_wrapped_properties(self) -> list[exp.Expr | list[exp.Expr]]: 2720 return self._parse_wrapped_csv(self._parse_property) 2721 2722 def _parse_property(self) -> exp.Expr | list[exp.Expr] | None: 2723 if self._match_texts(self.PROPERTY_PARSERS): 2724 return self.PROPERTY_PARSERS[self._prev.text.upper()](self) 2725 2726 if self._match(TokenType.DEFAULT) and self._match_texts(self.PROPERTY_PARSERS): 2727 return self.PROPERTY_PARSERS[self._prev.text.upper()](self, default=True) 2728 2729 if self._match_text_seq("COMPOUND", "SORTKEY"): 2730 return self._parse_sortkey(compound=True) 2731 2732 if self._match_text_seq("PARAMETER", "STYLE", "PANDAS"): 2733 return self.expression(exp.ParameterStyleProperty(this="PANDAS")) 2734 2735 index = self._index 2736 2737 seq_props = self._parse_sequence_properties() 2738 if seq_props: 2739 return seq_props 2740 2741 self._retreat(index) 2742 key = self._parse_column() 2743 2744 if not self._match(TokenType.EQ): 2745 self._retreat(index) 2746 return None 2747 2748 # Transform the key to exp.Dot if it's dotted identifiers wrapped in exp.Column or to exp.Var otherwise 2749 if isinstance(key, exp.Column): 2750 key = key.to_dot() if len(key.parts) > 1 else exp.var(key.name) 2751 2752 value = self._parse_bitwise() or self._parse_var(any_token=True) 2753 2754 # Transform the value to exp.Var if it was parsed as exp.Column(exp.Identifier()) 2755 if isinstance(value, exp.Column): 2756 value = exp.var(value.name) 2757 2758 return self.expression(exp.Property(this=key, value=value)) 2759 2760 def _parse_stored(self) -> exp.FileFormatProperty | exp.StorageHandlerProperty: 2761 if self._match_text_seq("BY"): 2762 return self.expression(exp.StorageHandlerProperty(this=self._parse_var_or_string())) 2763 2764 self._match(TokenType.ALIAS) 2765 input_format = self._parse_string() if self._match_text_seq("INPUTFORMAT") else None 2766 output_format = self._parse_string() if self._match_text_seq("OUTPUTFORMAT") else None 2767 2768 return self.expression( 2769 exp.FileFormatProperty( 2770 this=( 2771 self.expression( 2772 exp.InputOutputFormat( 2773 input_format=input_format, output_format=output_format 2774 ) 2775 ) 2776 if input_format or output_format 2777 else self._parse_var_or_string() or self._parse_number() or self._parse_id_var() 2778 ), 2779 hive_format=True, 2780 ) 2781 ) 2782 2783 def _parse_unquoted_field(self) -> exp.Expr | None: 2784 field = self._parse_field() 2785 if isinstance(field, exp.Identifier) and not field.quoted: 2786 field = exp.var(field) 2787 2788 return field 2789 2790 def _parse_property_assignment(self, exp_class: type[E], **kwargs: t.Any) -> E: 2791 self._match(TokenType.EQ) 2792 self._match(TokenType.ALIAS) 2793 2794 return self.expression(exp_class(this=self._parse_unquoted_field(), **kwargs)) 2795 2796 def _parse_properties(self, before: bool | None = None) -> exp.Properties | None: 2797 properties = [] 2798 while True: 2799 if before: 2800 prop = self._parse_property_before() 2801 else: 2802 prop = self._parse_property() 2803 if not prop: 2804 break 2805 for p in ensure_list(prop): 2806 properties.append(p) 2807 2808 if properties: 2809 return self.expression(exp.Properties(expressions=properties)) 2810 2811 return None 2812 2813 def _parse_fallback(self, no: bool = False) -> exp.FallbackProperty: 2814 return self.expression( 2815 exp.FallbackProperty(no=no, protection=self._match_text_seq("PROTECTION")) 2816 ) 2817 2818 def _parse_sql_security(self) -> exp.SqlSecurityProperty: 2819 return self.expression( 2820 exp.SqlSecurityProperty( 2821 this=self._match_texts(self.SECURITY_PROPERTY_KEYWORDS) and self._prev.text.upper() 2822 ) 2823 ) 2824 2825 def _parse_settings_property(self) -> exp.SettingsProperty: 2826 return self.expression( 2827 exp.SettingsProperty(expressions=self._parse_csv(self._parse_assignment)) 2828 ) 2829 2830 def _parse_volatile_property(self) -> exp.VolatileProperty | exp.StabilityProperty: 2831 if self._index >= 2: 2832 pre_volatile_token = self._tokens[self._index - 2] 2833 else: 2834 pre_volatile_token = None 2835 2836 if pre_volatile_token and pre_volatile_token.token_type in self.PRE_VOLATILE_TOKENS: 2837 return exp.VolatileProperty() 2838 2839 return self.expression(exp.StabilityProperty(this=exp.Literal.string("VOLATILE"))) 2840 2841 def _parse_retention_period(self) -> exp.Var: 2842 # Parse TSQL's HISTORY_RETENTION_PERIOD: {INFINITE | <number> DAY | DAYS | MONTH ...} 2843 number = self._parse_number() 2844 number_str = f"{number} " if number else "" 2845 unit = self._parse_var(any_token=True) 2846 return exp.var(f"{number_str}{unit}") 2847 2848 def _parse_system_versioning_property( 2849 self, with_: bool = False 2850 ) -> exp.WithSystemVersioningProperty: 2851 self._match(TokenType.EQ) 2852 prop = self.expression(exp.WithSystemVersioningProperty(on=True, with_=with_)) 2853 2854 if self._match_text_seq("OFF"): 2855 prop.set("on", False) 2856 return prop 2857 2858 self._match(TokenType.ON) 2859 if self._match(TokenType.L_PAREN): 2860 while self._curr and not self._match(TokenType.R_PAREN): 2861 if self._match_text_seq("HISTORY_TABLE", "="): 2862 prop.set("this", self._parse_table_parts()) 2863 elif self._match_text_seq("DATA_CONSISTENCY_CHECK", "="): 2864 prop.set("data_consistency", self._advance_any() and self._prev.text.upper()) 2865 elif self._match_text_seq("HISTORY_RETENTION_PERIOD", "="): 2866 prop.set("retention_period", self._parse_retention_period()) 2867 2868 self._match(TokenType.COMMA) 2869 2870 return prop 2871 2872 def _parse_data_deletion_property(self) -> exp.DataDeletionProperty: 2873 self._match(TokenType.EQ) 2874 on = self._match_text_seq("ON") or not self._match_text_seq("OFF") 2875 prop = self.expression(exp.DataDeletionProperty(on=on)) 2876 2877 if self._match(TokenType.L_PAREN): 2878 while self._curr and not self._match(TokenType.R_PAREN): 2879 if self._match_text_seq("FILTER_COLUMN", "="): 2880 prop.set("filter_column", self._parse_column()) 2881 elif self._match_text_seq("RETENTION_PERIOD", "="): 2882 prop.set("retention_period", self._parse_retention_period()) 2883 2884 self._match(TokenType.COMMA) 2885 2886 return prop 2887 2888 def _parse_distributed_property(self) -> exp.DistributedByProperty: 2889 kind = "HASH" 2890 expressions: list[exp.Expr] | None = None 2891 if self._match_text_seq("BY", "HASH"): 2892 expressions = self._parse_wrapped_csv(self._parse_id_var) 2893 elif self._match_text_seq("BY", "RANDOM"): 2894 kind = "RANDOM" 2895 2896 # If the BUCKETS keyword is not present, the number of buckets is AUTO 2897 buckets: exp.Expr | None = None 2898 if self._match_text_seq("BUCKETS") and not self._match_text_seq("AUTO"): 2899 buckets = self._parse_number() 2900 2901 return self.expression( 2902 exp.DistributedByProperty( 2903 expressions=expressions, kind=kind, buckets=buckets, order=self._parse_order() 2904 ) 2905 ) 2906 2907 def _parse_composite_key_property(self, expr_type: type[E]) -> E: 2908 self._match_text_seq("KEY") 2909 expressions = self._parse_wrapped_id_vars() 2910 return self.expression(expr_type(expressions=expressions)) 2911 2912 def _parse_with_property(self) -> exp.Expr | None | list[exp.Expr]: 2913 if self._match_text_seq("(", "SYSTEM_VERSIONING"): 2914 prop = self._parse_system_versioning_property(with_=True) 2915 self._match_r_paren() 2916 return prop 2917 2918 if self._match(TokenType.L_PAREN, advance=False): 2919 result: list[exp.Expr] = [] 2920 for i in self._parse_wrapped_properties(): 2921 result.extend(i) if isinstance(i, list) else result.append(i) 2922 return result 2923 2924 if self._match_text_seq("JOURNAL"): 2925 return self._parse_withjournaltable() 2926 2927 if self._match_texts(self.VIEW_ATTRIBUTES): 2928 return self.expression(exp.ViewAttributeProperty(this=self._prev.text.upper())) 2929 2930 if self._match_text_seq("DATA"): 2931 return self._parse_withdata(no=False) 2932 elif self._match_text_seq("NO", "DATA"): 2933 return self._parse_withdata(no=True) 2934 2935 if self._match(TokenType.SERDE_PROPERTIES, advance=False): 2936 return self._parse_serde_properties(with_=True) 2937 2938 if self._match(TokenType.SCHEMA): 2939 return self.expression( 2940 exp.WithSchemaBindingProperty( 2941 this=self._parse_var_from_options(self.SCHEMA_BINDING_OPTIONS) 2942 ) 2943 ) 2944 2945 if self._match_texts(self.PROCEDURE_OPTIONS, advance=False): 2946 return self.expression( 2947 exp.WithProcedureOptions(expressions=self._parse_csv(self._parse_procedure_option)) 2948 ) 2949 2950 if not self._next: 2951 return None 2952 2953 return self._parse_withisolatedloading() 2954 2955 def _parse_procedure_option(self) -> exp.Expr | None: 2956 if self._match_text_seq("EXECUTE", "AS"): 2957 return self.expression( 2958 exp.ExecuteAsProperty( 2959 this=self._parse_var_from_options( 2960 self.EXECUTE_AS_OPTIONS, raise_unmatched=False 2961 ) 2962 or self._parse_string() 2963 ) 2964 ) 2965 2966 return self._parse_var_from_options(self.PROCEDURE_OPTIONS) 2967 2968 # https://dev.mysql.com/doc/refman/8.0/en/create-view.html 2969 def _parse_definer(self) -> exp.DefinerProperty | None: 2970 self._match(TokenType.EQ) 2971 2972 user = self._parse_id_var() 2973 self._match(TokenType.PARAMETER) 2974 host = self._parse_id_var() or (self._match(TokenType.MOD) and self._prev.text) 2975 2976 if not user or not host: 2977 return None 2978 2979 return exp.DefinerProperty(this=f"{user}@{host}") 2980 2981 def _parse_withjournaltable(self) -> exp.WithJournalTableProperty: 2982 self._match(TokenType.TABLE) 2983 self._match(TokenType.EQ) 2984 return self.expression(exp.WithJournalTableProperty(this=self._parse_table_parts())) 2985 2986 def _parse_log(self, no: bool = False) -> exp.LogProperty: 2987 return self.expression(exp.LogProperty(no=no)) 2988 2989 def _parse_journal(self, **kwargs) -> exp.JournalProperty: 2990 return self.expression(exp.JournalProperty(**kwargs)) 2991 2992 def _parse_checksum(self) -> exp.ChecksumProperty: 2993 self._match(TokenType.EQ) 2994 2995 on = None 2996 if self._match(TokenType.ON): 2997 on = True 2998 elif self._match_text_seq("OFF"): 2999 on = False 3000 3001 return self.expression(exp.ChecksumProperty(on=on, default=self._match(TokenType.DEFAULT))) 3002 3003 def _parse_cluster(self, wrapped: bool = False) -> exp.Cluster: 3004 return self.expression( 3005 exp.Cluster( 3006 expressions=( 3007 self._parse_wrapped_csv(self._parse_ordered) 3008 if wrapped 3009 else self._parse_csv(self._parse_ordered) 3010 ) 3011 ) 3012 ) 3013 3014 def _parse_clustered_by(self) -> exp.ClusteredByProperty: 3015 self._match_text_seq("BY") 3016 3017 self._match_l_paren() 3018 expressions = self._parse_csv(self._parse_column) 3019 self._match_r_paren() 3020 3021 if self._match_text_seq("SORTED", "BY"): 3022 self._match_l_paren() 3023 sorted_by = self._parse_csv(self._parse_ordered) 3024 self._match_r_paren() 3025 else: 3026 sorted_by = None 3027 3028 self._match(TokenType.INTO) 3029 buckets = self._parse_number() 3030 self._match_text_seq("BUCKETS") 3031 3032 return self.expression( 3033 exp.ClusteredByProperty(expressions=expressions, sorted_by=sorted_by, buckets=buckets) 3034 ) 3035 3036 def _parse_copy_property(self) -> exp.CopyGrantsProperty | None: 3037 if not self._match_text_seq("GRANTS"): 3038 self._retreat(self._index - 1) 3039 return None 3040 3041 return self.expression(exp.CopyGrantsProperty()) 3042 3043 def _parse_freespace(self) -> exp.FreespaceProperty: 3044 self._match(TokenType.EQ) 3045 return self.expression( 3046 exp.FreespaceProperty(this=self._parse_number(), percent=self._match(TokenType.PERCENT)) 3047 ) 3048 3049 def _parse_mergeblockratio( 3050 self, no: bool = False, default: bool = False 3051 ) -> exp.MergeBlockRatioProperty: 3052 if self._match(TokenType.EQ): 3053 return self.expression( 3054 exp.MergeBlockRatioProperty( 3055 this=self._parse_number(), percent=self._match(TokenType.PERCENT) 3056 ) 3057 ) 3058 3059 return self.expression(exp.MergeBlockRatioProperty(no=no, default=default)) 3060 3061 def _parse_datablocksize( 3062 self, 3063 default: bool | None = None, 3064 minimum: bool | None = None, 3065 maximum: bool | None = None, 3066 ) -> exp.DataBlocksizeProperty: 3067 self._match(TokenType.EQ) 3068 size = self._parse_number() 3069 3070 units = None 3071 if self._match_texts(("BYTES", "KBYTES", "KILOBYTES")): 3072 units = self._prev.text 3073 3074 return self.expression( 3075 exp.DataBlocksizeProperty( 3076 size=size, units=units, default=default, minimum=minimum, maximum=maximum 3077 ) 3078 ) 3079 3080 def _parse_blockcompression(self) -> exp.BlockCompressionProperty: 3081 self._match(TokenType.EQ) 3082 always = self._match_text_seq("ALWAYS") 3083 manual = self._match_text_seq("MANUAL") 3084 never = self._match_text_seq("NEVER") 3085 default = self._match_text_seq("DEFAULT") 3086 3087 autotemp = None 3088 if self._match_text_seq("AUTOTEMP"): 3089 autotemp = self._parse_schema() 3090 3091 return self.expression( 3092 exp.BlockCompressionProperty( 3093 always=always, manual=manual, never=never, default=default, autotemp=autotemp 3094 ) 3095 ) 3096 3097 def _parse_withisolatedloading(self) -> exp.IsolatedLoadingProperty | None: 3098 index = self._index 3099 no = self._match_text_seq("NO") 3100 concurrent = self._match_text_seq("CONCURRENT") 3101 3102 if not self._match_text_seq("ISOLATED", "LOADING"): 3103 self._retreat(index) 3104 return None 3105 3106 target = self._parse_var_from_options(self.ISOLATED_LOADING_OPTIONS, raise_unmatched=False) 3107 return self.expression( 3108 exp.IsolatedLoadingProperty(no=no, concurrent=concurrent, target=target) 3109 ) 3110 3111 def _parse_locking(self) -> exp.LockingProperty: 3112 if self._match(TokenType.TABLE): 3113 kind = "TABLE" 3114 elif self._match(TokenType.VIEW): 3115 kind = "VIEW" 3116 elif self._match(TokenType.ROW): 3117 kind = "ROW" 3118 elif self._match_text_seq("DATABASE"): 3119 kind = "DATABASE" 3120 else: 3121 kind = None 3122 3123 if kind in ("DATABASE", "TABLE", "VIEW"): 3124 this = self._parse_table_parts() 3125 else: 3126 this = None 3127 3128 if self._match(TokenType.FOR): 3129 for_or_in = "FOR" 3130 elif self._match(TokenType.IN): 3131 for_or_in = "IN" 3132 else: 3133 for_or_in = None 3134 3135 if self._match_text_seq("ACCESS"): 3136 lock_type = "ACCESS" 3137 elif self._match_texts(("EXCL", "EXCLUSIVE")): 3138 lock_type = "EXCLUSIVE" 3139 elif self._match_text_seq("SHARE"): 3140 lock_type = "SHARE" 3141 elif self._match_text_seq("READ"): 3142 lock_type = "READ" 3143 elif self._match_text_seq("WRITE"): 3144 lock_type = "WRITE" 3145 elif self._match_text_seq("CHECKSUM"): 3146 lock_type = "CHECKSUM" 3147 else: 3148 lock_type = None 3149 3150 override = self._match_text_seq("OVERRIDE") 3151 3152 return self.expression( 3153 exp.LockingProperty( 3154 this=this, kind=kind, for_or_in=for_or_in, lock_type=lock_type, override=override 3155 ) 3156 ) 3157 3158 def _parse_partition_by(self) -> list[exp.Expr]: 3159 if self._match(TokenType.PARTITION_BY): 3160 return self._parse_csv(self._parse_disjunction) 3161 return [] 3162 3163 def _parse_partition_bound_spec(self) -> exp.PartitionBoundSpec: 3164 def _parse_partition_bound_expr() -> exp.Expr | None: 3165 if self._match_text_seq("MINVALUE"): 3166 return exp.var("MINVALUE") 3167 if self._match_text_seq("MAXVALUE"): 3168 return exp.var("MAXVALUE") 3169 return self._parse_bitwise() 3170 3171 this: exp.Expr | list[exp.Expr] | None = None 3172 expression = None 3173 from_expressions = None 3174 to_expressions = None 3175 3176 if self._match(TokenType.IN): 3177 this = self._parse_wrapped_csv(self._parse_bitwise) 3178 elif self._match(TokenType.FROM): 3179 from_expressions = self._parse_wrapped_csv(_parse_partition_bound_expr) 3180 self._match_text_seq("TO") 3181 to_expressions = self._parse_wrapped_csv(_parse_partition_bound_expr) 3182 elif self._match_text_seq("WITH", "(", "MODULUS"): 3183 this = self._parse_number() 3184 self._match_text_seq(",", "REMAINDER") 3185 expression = self._parse_number() 3186 self._match_r_paren() 3187 else: 3188 self.raise_error("Failed to parse partition bound spec.") 3189 3190 return self.expression( 3191 exp.PartitionBoundSpec( 3192 this=this, 3193 expression=expression, 3194 from_expressions=from_expressions, 3195 to_expressions=to_expressions, 3196 ) 3197 ) 3198 3199 # https://www.postgresql.org/docs/current/sql-createtable.html 3200 def _parse_partitioned_of(self) -> exp.PartitionedOfProperty | None: 3201 if not self._match_text_seq("OF"): 3202 self._retreat(self._index - 1) 3203 return None 3204 3205 this = self._parse_table(schema=True) 3206 3207 if self._match(TokenType.DEFAULT): 3208 expression: exp.Var | exp.PartitionBoundSpec = exp.var("DEFAULT") 3209 elif self._match_text_seq("FOR", "VALUES"): 3210 expression = self._parse_partition_bound_spec() 3211 else: 3212 self.raise_error("Expecting either DEFAULT or FOR VALUES clause.") 3213 3214 return self.expression(exp.PartitionedOfProperty(this=this, expression=expression)) 3215 3216 def _parse_partitioned_by(self) -> exp.PartitionedByProperty: 3217 self._match(TokenType.EQ) 3218 return self.expression( 3219 exp.PartitionedByProperty( 3220 this=self._parse_schema() or self._parse_bracket(self._parse_field()) 3221 ) 3222 ) 3223 3224 def _parse_withdata(self, no: bool = False) -> exp.WithDataProperty: 3225 if self._match_text_seq("AND", "STATISTICS"): 3226 statistics = True 3227 elif self._match_text_seq("AND", "NO", "STATISTICS"): 3228 statistics = False 3229 else: 3230 statistics = None 3231 3232 return self.expression(exp.WithDataProperty(no=no, statistics=statistics)) 3233 3234 def _parse_contains_property(self) -> exp.SqlReadWriteProperty | None: 3235 if self._match_text_seq("SQL"): 3236 return self.expression(exp.SqlReadWriteProperty(this="CONTAINS SQL")) 3237 return None 3238 3239 def _parse_modifies_property(self) -> exp.SqlReadWriteProperty | None: 3240 if self._match_text_seq("SQL", "DATA"): 3241 return self.expression(exp.SqlReadWriteProperty(this="MODIFIES SQL DATA")) 3242 return None 3243 3244 def _parse_no_property(self) -> exp.Expr | None: 3245 if self._match_text_seq("PRIMARY", "INDEX"): 3246 return exp.NoPrimaryIndexProperty() 3247 if self._match_text_seq("SQL"): 3248 return self.expression(exp.SqlReadWriteProperty(this="NO SQL")) 3249 return None 3250 3251 def _parse_on_property(self) -> exp.Expr | None: 3252 if self._match_text_seq("COMMIT", "PRESERVE", "ROWS"): 3253 return exp.OnCommitProperty() 3254 if self._match_text_seq("COMMIT", "DELETE", "ROWS"): 3255 return exp.OnCommitProperty(delete=True) 3256 return self.expression(exp.OnProperty(this=self._parse_schema(self._parse_id_var()))) 3257 3258 def _parse_reads_property(self) -> exp.SqlReadWriteProperty | None: 3259 if self._match_text_seq("SQL", "DATA"): 3260 return self.expression(exp.SqlReadWriteProperty(this="READS SQL DATA")) 3261 return None 3262 3263 def _parse_distkey(self) -> exp.DistKeyProperty: 3264 return self.expression(exp.DistKeyProperty(this=self._parse_wrapped(self._parse_id_var))) 3265 3266 def _parse_create_like(self) -> exp.LikeProperty | None: 3267 table = self._parse_table(schema=True) 3268 3269 options = [] 3270 while self._match_texts(("INCLUDING", "EXCLUDING")): 3271 this = self._prev.text.upper() 3272 3273 id_var = self._parse_id_var() 3274 if not id_var: 3275 return None 3276 3277 options.append( 3278 self.expression(exp.Property(this=this, value=exp.var(id_var.this.upper()))) 3279 ) 3280 3281 return self.expression(exp.LikeProperty(this=table, expressions=options)) 3282 3283 def _parse_sortkey(self, compound: bool = False) -> exp.SortKeyProperty: 3284 return self.expression( 3285 exp.SortKeyProperty(this=self._parse_wrapped_id_vars(), compound=compound) 3286 ) 3287 3288 def _parse_character_set(self, default: bool = False) -> exp.CharacterSetProperty: 3289 self._match(TokenType.EQ) 3290 return self.expression( 3291 exp.CharacterSetProperty(this=self._parse_var_or_string(), default=default) 3292 ) 3293 3294 def _parse_remote_with_connection(self) -> exp.RemoteWithConnectionModelProperty: 3295 self._match_text_seq("WITH", "CONNECTION") 3296 return self.expression( 3297 exp.RemoteWithConnectionModelProperty(this=self._parse_table_parts()) 3298 ) 3299 3300 def _parse_returns(self) -> exp.ReturnsProperty: 3301 value: exp.Expr | None 3302 null = None 3303 is_table = self._match(TokenType.TABLE) 3304 3305 if is_table: 3306 if self._match(TokenType.LT): 3307 value = self.expression( 3308 exp.Schema(this="TABLE", expressions=self._parse_csv(self._parse_struct_types)) 3309 ) 3310 if not self._match(TokenType.GT): 3311 self.raise_error("Expecting >") 3312 else: 3313 value = self._parse_schema(exp.var("TABLE")) 3314 elif self._match_text_seq("NULL", "ON", "NULL", "INPUT"): 3315 null = True 3316 value = None 3317 else: 3318 value = self._parse_types() 3319 3320 return self.expression(exp.ReturnsProperty(this=value, is_table=is_table, null=null)) 3321 3322 def _parse_describe(self) -> exp.Describe: 3323 kind = self._prev.text if self._match_set(self.CREATABLES) else None 3324 style: str | None = ( 3325 self._prev.text.upper() if self._match_texts(self.DESCRIBE_STYLES) else None 3326 ) 3327 if self._match(TokenType.DOT): 3328 style = None 3329 self._retreat(self._index - 2) 3330 3331 format = self._parse_property() if self._match(TokenType.FORMAT, advance=False) else None 3332 3333 if self._match_set(self.STATEMENT_PARSERS, advance=False): 3334 this = self._parse_statement() 3335 else: 3336 this = self._parse_table(schema=True) 3337 3338 properties = self._parse_properties() 3339 expressions = properties.expressions if properties else None 3340 partition = self._parse_partition() 3341 return self.expression( 3342 exp.Describe( 3343 this=this, 3344 style=style, 3345 kind=kind, 3346 expressions=expressions, 3347 partition=partition, 3348 format=format, 3349 as_json=self._match_text_seq("AS", "JSON"), 3350 ) 3351 ) 3352 3353 def _parse_multitable_inserts(self, comments: list[str] | None) -> exp.MultitableInserts: 3354 kind = self._prev.text.upper() 3355 expressions = [] 3356 3357 def parse_conditional_insert() -> exp.ConditionalInsert | None: 3358 if self._match(TokenType.WHEN): 3359 expression = self._parse_disjunction() 3360 self._match(TokenType.THEN) 3361 else: 3362 expression = None 3363 3364 else_ = self._match(TokenType.ELSE) 3365 3366 if not self._match(TokenType.INTO): 3367 return None 3368 3369 return self.expression( 3370 exp.ConditionalInsert( 3371 this=self.expression( 3372 exp.Insert( 3373 this=self._parse_table(schema=True), 3374 expression=self._parse_derived_table_values(), 3375 ) 3376 ), 3377 expression=expression, 3378 else_=else_, 3379 ) 3380 ) 3381 3382 expression = parse_conditional_insert() 3383 while expression is not None: 3384 expressions.append(expression) 3385 expression = parse_conditional_insert() 3386 3387 return self.expression( 3388 exp.MultitableInserts(kind=kind, expressions=expressions, source=self._parse_table()), 3389 comments=comments, 3390 ) 3391 3392 def _parse_insert(self) -> exp.Insert | exp.MultitableInserts: 3393 comments: list[str] = [] 3394 hint = self._parse_hint() 3395 overwrite = self._match(TokenType.OVERWRITE) 3396 ignore = self._match(TokenType.IGNORE) 3397 local = self._match_text_seq("LOCAL") 3398 alternative = None 3399 is_function = None 3400 3401 if self._match_text_seq("DIRECTORY"): 3402 this: exp.Expr | None = self.expression( 3403 exp.Directory( 3404 this=self._parse_var_or_string(), 3405 local=local, 3406 row_format=self._parse_row_format(match_row=True), 3407 ) 3408 ) 3409 else: 3410 if self._match_set((TokenType.FIRST, TokenType.ALL)): 3411 comments += ensure_list(self._prev_comments) 3412 return self._parse_multitable_inserts(comments) 3413 3414 if self._match(TokenType.OR): 3415 alternative = self._match_texts(self.INSERT_ALTERNATIVES) and self._prev.text 3416 3417 self._match(TokenType.INTO) 3418 comments += ensure_list(self._prev_comments) 3419 self._match(TokenType.TABLE) 3420 is_function = self._match(TokenType.FUNCTION) 3421 3422 this = self._parse_function() if is_function else self._parse_insert_table() 3423 3424 returning = self._parse_returning() # TSQL allows RETURNING before source 3425 3426 return self.expression( 3427 exp.Insert( 3428 hint=hint, 3429 is_function=is_function, 3430 this=this, 3431 stored=self._match_text_seq("STORED") and self._parse_stored(), 3432 by_name=self._match_text_seq("BY", "NAME"), 3433 exists=self._parse_exists(), 3434 where=self._match_pair(TokenType.REPLACE, TokenType.WHERE) 3435 and self._parse_disjunction(), 3436 partition=self._match(TokenType.PARTITION_BY) and self._parse_partitioned_by(), 3437 settings=self._match_text_seq("SETTINGS") and self._parse_settings_property(), 3438 default=self._match_text_seq("DEFAULT", "VALUES"), 3439 expression=self._parse_derived_table_values() or self._parse_ddl_select(), 3440 conflict=self._parse_on_conflict(), 3441 returning=returning or self._parse_returning(), 3442 overwrite=overwrite, 3443 alternative=alternative, 3444 ignore=ignore, 3445 source=self._match(TokenType.TABLE) and self._parse_table(), 3446 ), 3447 comments=comments, 3448 ) 3449 3450 def _parse_insert_table(self) -> exp.Expr | None: 3451 this = self._parse_table(schema=True, parse_partition=True) 3452 if isinstance(this, exp.Table) and self._match(TokenType.ALIAS, advance=False): 3453 this.set("alias", self._parse_table_alias()) 3454 return this 3455 3456 def _parse_kill(self) -> exp.Kill: 3457 kind = exp.var(self._prev.text) if self._match_texts(("CONNECTION", "QUERY")) else None 3458 3459 return self.expression(exp.Kill(this=self._parse_primary(), kind=kind)) 3460 3461 def _parse_on_conflict(self) -> exp.OnConflict | None: 3462 conflict = self._match_text_seq("ON", "CONFLICT") 3463 duplicate = self._match_text_seq("ON", "DUPLICATE", "KEY") 3464 3465 if not conflict and not duplicate: 3466 return None 3467 3468 conflict_keys = None 3469 constraint = None 3470 3471 if conflict: 3472 if self._match_text_seq("ON", "CONSTRAINT"): 3473 constraint = self._parse_id_var() 3474 elif self._match(TokenType.L_PAREN): 3475 conflict_keys = self._parse_csv(self._parse_id_var) 3476 self._match_r_paren() 3477 3478 index_predicate = self._parse_where() 3479 3480 action = self._parse_var_from_options(self.CONFLICT_ACTIONS) 3481 if self._prev.token_type == TokenType.UPDATE: 3482 self._match(TokenType.SET) 3483 expressions = self._parse_csv(self._parse_equality) 3484 else: 3485 expressions = None 3486 3487 return self.expression( 3488 exp.OnConflict( 3489 duplicate=duplicate, 3490 expressions=expressions, 3491 action=action, 3492 conflict_keys=conflict_keys, 3493 index_predicate=index_predicate, 3494 constraint=constraint, 3495 where=self._parse_where(), 3496 ) 3497 ) 3498 3499 def _parse_returning(self) -> exp.Returning | None: 3500 if not self._match(TokenType.RETURNING): 3501 return None 3502 return self.expression( 3503 exp.Returning( 3504 expressions=self._parse_csv(self._parse_expression), 3505 into=self._match(TokenType.INTO) and self._parse_table_part(), 3506 ) 3507 ) 3508 3509 def _parse_row(self) -> exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty | None: 3510 if not self._match(TokenType.FORMAT): 3511 return None 3512 return self._parse_row_format() 3513 3514 def _parse_serde_properties(self, with_: bool = False) -> exp.SerdeProperties | None: 3515 index = self._index 3516 with_ = with_ or self._match_text_seq("WITH") 3517 3518 if not self._match(TokenType.SERDE_PROPERTIES): 3519 self._retreat(index) 3520 return None 3521 return self.expression( 3522 exp.SerdeProperties(expressions=self._parse_wrapped_properties(), with_=with_) 3523 ) 3524 3525 def _parse_row_format( 3526 self, match_row: bool = False 3527 ) -> exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty | None: 3528 if match_row and not self._match_pair(TokenType.ROW, TokenType.FORMAT): 3529 return None 3530 3531 if self._match_text_seq("SERDE"): 3532 this = self._parse_string() 3533 3534 serde_properties = self._parse_serde_properties() 3535 3536 return self.expression( 3537 exp.RowFormatSerdeProperty(this=this, serde_properties=serde_properties) 3538 ) 3539 3540 self._match_text_seq("DELIMITED") 3541 3542 kwargs = {} 3543 3544 if self._match_text_seq("FIELDS", "TERMINATED", "BY"): 3545 kwargs["fields"] = self._parse_string() 3546 if self._match_text_seq("ESCAPED", "BY"): 3547 kwargs["escaped"] = self._parse_string() 3548 if self._match_text_seq("COLLECTION", "ITEMS", "TERMINATED", "BY"): 3549 kwargs["collection_items"] = self._parse_string() 3550 if self._match_text_seq("MAP", "KEYS", "TERMINATED", "BY"): 3551 kwargs["map_keys"] = self._parse_string() 3552 if self._match_text_seq("LINES", "TERMINATED", "BY"): 3553 kwargs["lines"] = self._parse_string() 3554 if self._match_text_seq("NULL", "DEFINED", "AS"): 3555 kwargs["null"] = self._parse_string() 3556 3557 return self.expression(exp.RowFormatDelimitedProperty(**kwargs)) # type: ignore 3558 3559 def _parse_load(self) -> exp.LoadData | exp.Command: 3560 if self._match_text_seq("DATA"): 3561 local = self._match_text_seq("LOCAL") 3562 self._match_text_seq("INPATH") 3563 inpath = self._parse_string() 3564 overwrite = self._match(TokenType.OVERWRITE) 3565 self._match_pair(TokenType.INTO, TokenType.TABLE) 3566 3567 return self.expression( 3568 exp.LoadData( 3569 this=self._parse_table(schema=True), 3570 local=local, 3571 overwrite=overwrite, 3572 inpath=inpath, 3573 files=self._match_text_seq("FROM", "FILES") 3574 and exp.Properties(expressions=self._parse_wrapped_properties()), 3575 partition=self._parse_partition(), 3576 input_format=self._match_text_seq("INPUTFORMAT") and self._parse_string(), 3577 serde=self._match_text_seq("SERDE") and self._parse_string(), 3578 ) 3579 ) 3580 return self._parse_as_command(self._prev) 3581 3582 def _parse_delete(self) -> exp.Delete: 3583 hint = self._parse_hint() 3584 3585 # This handles MySQL's "Multiple-Table Syntax" 3586 # https://dev.mysql.com/doc/refman/8.0/en/delete.html 3587 tables = None 3588 if not self._match(TokenType.FROM, advance=False): 3589 tables = self._parse_csv(self._parse_table) or None 3590 3591 returning = self._parse_returning() 3592 3593 return self.expression( 3594 exp.Delete( 3595 hint=hint, 3596 tables=tables, 3597 this=self._match(TokenType.FROM) and self._parse_table(joins=True), 3598 using=self._match(TokenType.USING) 3599 and self._parse_csv(lambda: self._parse_table(joins=True)), 3600 cluster=self._match(TokenType.ON) and self._parse_on_property(), 3601 where=self._parse_where(), 3602 returning=returning or self._parse_returning(), 3603 order=self._parse_order(), 3604 limit=self._parse_limit(), 3605 ) 3606 ) 3607 3608 def _parse_update(self) -> exp.Update: 3609 hint = self._parse_hint() 3610 kwargs: dict[str, object] = { 3611 "hint": hint, 3612 "this": self._parse_table(joins=True, alias_tokens=self.UPDATE_ALIAS_TOKENS), 3613 } 3614 while self._curr: 3615 if self._match(TokenType.SET): 3616 kwargs["expressions"] = self._parse_csv(self._parse_equality) 3617 elif self._match(TokenType.RETURNING, advance=False): 3618 kwargs["returning"] = self._parse_returning() 3619 elif self._match(TokenType.FROM, advance=False): 3620 from_ = self._parse_from(joins=True) 3621 table = from_.this if from_ else None 3622 if isinstance(table, exp.Subquery) and self._match(TokenType.JOIN, advance=False): 3623 table.set("joins", list(self._parse_joins()) or None) 3624 3625 kwargs["from_"] = from_ 3626 elif self._match(TokenType.WHERE, advance=False): 3627 kwargs["where"] = self._parse_where() 3628 elif self._match(TokenType.ORDER_BY, advance=False): 3629 kwargs["order"] = self._parse_order() 3630 elif self._match(TokenType.LIMIT, advance=False): 3631 kwargs["limit"] = self._parse_limit() 3632 else: 3633 break 3634 3635 return self.expression(exp.Update(**kwargs)) 3636 3637 def _parse_use(self) -> exp.Use: 3638 return self.expression( 3639 exp.Use( 3640 kind=self._parse_var_from_options(self.USABLES, raise_unmatched=False), 3641 this=self._parse_table(schema=False), 3642 ) 3643 ) 3644 3645 def _parse_uncache(self) -> exp.Uncache: 3646 if not self._match(TokenType.TABLE): 3647 self.raise_error("Expecting TABLE after UNCACHE") 3648 3649 return self.expression( 3650 exp.Uncache(exists=self._parse_exists(), this=self._parse_table(schema=True)) 3651 ) 3652 3653 def _parse_cache(self) -> exp.Cache: 3654 lazy = self._match_text_seq("LAZY") 3655 self._match(TokenType.TABLE) 3656 table = self._parse_table(schema=True) 3657 3658 options = [] 3659 if self._match_text_seq("OPTIONS"): 3660 self._match_l_paren() 3661 k = self._parse_string() 3662 self._match(TokenType.EQ) 3663 v = self._parse_string() 3664 options = [k, v] 3665 self._match_r_paren() 3666 3667 self._match(TokenType.ALIAS) 3668 return self.expression( 3669 exp.Cache( 3670 this=table, lazy=lazy, options=options, expression=self._parse_select(nested=True) 3671 ) 3672 ) 3673 3674 def _parse_partition(self) -> exp.Partition | None: 3675 if not self._match_texts(self.PARTITION_KEYWORDS): 3676 return None 3677 3678 return self.expression( 3679 exp.Partition( 3680 subpartition=self._prev.text.upper() == "SUBPARTITION", 3681 expressions=self._parse_wrapped_csv(self._parse_disjunction), 3682 ) 3683 ) 3684 3685 def _parse_value(self, values: bool = True) -> exp.Tuple | None: 3686 def _parse_value_expression() -> exp.Expr | None: 3687 if self.dialect.SUPPORTS_VALUES_DEFAULT and self._match(TokenType.DEFAULT): 3688 return exp.var(self._prev.text.upper()) 3689 return self._parse_expression() 3690 3691 if self._match(TokenType.L_PAREN): 3692 expressions = self._parse_csv(_parse_value_expression) 3693 self._match_r_paren() 3694 return self.expression(exp.Tuple(expressions=expressions)) 3695 3696 # In some dialects we can have VALUES 1, 2 which results in 1 column & 2 rows. 3697 expression = self._parse_expression() 3698 if expression: 3699 return self.expression(exp.Tuple(expressions=[expression])) 3700 return None 3701 3702 def _parse_projections( 3703 self, 3704 ) -> tuple[list[exp.Expr], list[exp.Expr] | None]: 3705 return self._parse_expressions(), None 3706 3707 def _parse_wrapped_select(self, table: bool = False) -> exp.Expr | None: 3708 if self._match_set((TokenType.PIVOT, TokenType.UNPIVOT)): 3709 this: exp.Expr | None = self._parse_simplified_pivot( 3710 is_unpivot=self._prev.token_type == TokenType.UNPIVOT 3711 ) 3712 elif self._match(TokenType.FROM): 3713 from_ = self._parse_from(skip_from_token=True, consume_pipe=True) 3714 # Support parentheses for duckdb FROM-first syntax 3715 select = self._parse_select(from_=from_) 3716 if select: 3717 if not select.args.get("from_"): 3718 select.set("from_", from_) 3719 this = select 3720 else: 3721 this = exp.select("*").from_(t.cast(exp.From, from_)) 3722 this = self._parse_query_modifiers(self._parse_set_operations(this)) 3723 else: 3724 this = ( 3725 self._parse_table(consume_pipe=True) 3726 if table 3727 else self._parse_select(nested=True, parse_set_operation=False) 3728 ) 3729 3730 # Transform exp.Values into a exp.Table to pass through parse_query_modifiers 3731 # in case a modifier (e.g. join) is following 3732 if table and isinstance(this, exp.Values) and this.alias: 3733 alias = this.args["alias"].pop() 3734 this = exp.Table(this=this, alias=alias) 3735 3736 this = self._parse_query_modifiers(self._parse_set_operations(this)) 3737 3738 return this 3739 3740 def _parse_select( 3741 self, 3742 nested: bool = False, 3743 table: bool = False, 3744 parse_subquery_alias: bool = True, 3745 parse_set_operation: bool = True, 3746 consume_pipe: bool = True, 3747 from_: exp.From | None = None, 3748 ) -> exp.Expr | None: 3749 query = self._parse_select_query( 3750 nested=nested, 3751 table=table, 3752 parse_subquery_alias=parse_subquery_alias, 3753 parse_set_operation=parse_set_operation, 3754 ) 3755 3756 if consume_pipe and self._match(TokenType.PIPE_GT, advance=False): 3757 if not query and from_: 3758 query = exp.select("*").from_(from_) 3759 if isinstance(query, exp.Query): 3760 query = self._parse_pipe_syntax_query(query) 3761 query = query.subquery(copy=False) if query and table else query 3762 3763 return query 3764 3765 def _parse_select_query( 3766 self, 3767 nested: bool = False, 3768 table: bool = False, 3769 parse_subquery_alias: bool = True, 3770 parse_set_operation: bool = True, 3771 ) -> exp.Expr | None: 3772 cte = self._parse_with() 3773 3774 if cte: 3775 this = self._parse_statement() 3776 3777 if not this: 3778 self.raise_error("Failed to parse any statement following CTE") 3779 return cte 3780 3781 while isinstance(this, exp.Subquery) and this.is_wrapper: 3782 this = this.this 3783 3784 assert this is not None 3785 if "with_" in this.arg_types: 3786 this.set("with_", cte) 3787 else: 3788 self.raise_error(f"{this.key} does not support CTE") 3789 this = cte 3790 3791 return this 3792 3793 # duckdb supports leading with FROM x 3794 from_ = ( 3795 self._parse_from(joins=True, consume_pipe=True) 3796 if self._match(TokenType.FROM, advance=False) 3797 else None 3798 ) 3799 3800 if self._match(TokenType.SELECT): 3801 comments = self._prev_comments 3802 3803 hint = self._parse_hint() 3804 3805 if self._next and not self._next.token_type == TokenType.DOT: 3806 all_ = self._match(TokenType.ALL) 3807 matched_distinct = self._match_set(self.DISTINCT_TOKENS) 3808 else: 3809 all_, matched_distinct = None, False 3810 3811 kind = ( 3812 self._prev.text.upper() 3813 if self._match(TokenType.ALIAS) and self._match_texts(("STRUCT", "VALUE")) 3814 else None 3815 ) 3816 3817 distinct: exp.Expr | None = ( 3818 self.expression( 3819 exp.Distinct( 3820 on=self._parse_value(values=False) if self._match(TokenType.ON) else None 3821 ) 3822 ) 3823 if matched_distinct 3824 else None 3825 ) 3826 3827 if all_ and distinct: 3828 self.raise_error("Cannot specify both ALL and DISTINCT after SELECT") 3829 3830 operation_modifiers = [] 3831 while self._curr and self._match_texts(self.OPERATION_MODIFIERS): 3832 operation_modifiers.append(exp.var(self._prev.text.upper())) 3833 3834 limit = self._parse_limit(top=True) 3835 projections, exclude = self._parse_projections() 3836 3837 this = self.expression( 3838 exp.Select( 3839 kind=kind, 3840 hint=hint, 3841 distinct=distinct, 3842 expressions=projections, 3843 limit=limit, 3844 exclude=exclude, 3845 operation_modifiers=operation_modifiers or None, 3846 ) 3847 ) 3848 this.comments = comments 3849 3850 into = self._parse_into() 3851 if into: 3852 this.set("into", into) 3853 3854 if not from_: 3855 from_ = self._parse_from() 3856 3857 if from_: 3858 this.set("from_", from_) 3859 3860 this = self._parse_query_modifiers(this) 3861 elif (table or nested) and self._match(TokenType.L_PAREN): 3862 comments = self._prev_comments 3863 this = self._parse_wrapped_select(table=table) 3864 3865 if this: 3866 this.add_comments(comments, prepend=True) 3867 3868 # We return early here so that the UNION isn't attached to the subquery by the 3869 # following call to _parse_set_operations, but instead becomes the parent node 3870 self._match_r_paren() 3871 return self._parse_subquery(this, parse_alias=parse_subquery_alias) 3872 elif self._match(TokenType.VALUES, advance=False): 3873 this = self._parse_derived_table_values() 3874 elif from_: 3875 this = exp.select("*").from_(from_.this, copy=False) 3876 this = self._parse_query_modifiers(this) 3877 elif self._match(TokenType.SUMMARIZE): 3878 table = self._match(TokenType.TABLE) 3879 this = self._parse_select() or self._parse_string() or self._parse_table() 3880 return self.expression(exp.Summarize(this=this, table=table)) 3881 elif self._match(TokenType.DESCRIBE): 3882 this = self._parse_describe() 3883 else: 3884 this = None 3885 3886 return self._parse_set_operations(this) if parse_set_operation else this 3887 3888 def _parse_recursive_with_search(self) -> exp.RecursiveWithSearch | None: 3889 self._match_text_seq("SEARCH") 3890 3891 kind = self._match_texts(self.RECURSIVE_CTE_SEARCH_KIND) and self._prev.text.upper() 3892 3893 if not kind: 3894 return None 3895 3896 self._match_text_seq("FIRST", "BY") 3897 3898 return self.expression( 3899 exp.RecursiveWithSearch( 3900 kind=kind, 3901 this=self._parse_id_var(), 3902 expression=self._match_text_seq("SET") and self._parse_id_var(), 3903 using=self._match_text_seq("USING") and self._parse_id_var(), 3904 ) 3905 ) 3906 3907 def _parse_with(self, skip_with_token: bool = False) -> exp.With | None: 3908 if not skip_with_token and not self._match(TokenType.WITH): 3909 return None 3910 3911 comments = self._prev_comments 3912 recursive = self._match(TokenType.RECURSIVE) 3913 3914 last_comments = None 3915 expressions = [] 3916 while True: 3917 cte = self._parse_cte() 3918 if isinstance(cte, exp.CTE): 3919 expressions.append(cte) 3920 if last_comments: 3921 cte.add_comments(last_comments) 3922 3923 if not self._match(TokenType.COMMA) and not self._match(TokenType.WITH): 3924 break 3925 else: 3926 self._match(TokenType.WITH) 3927 3928 last_comments = self._prev_comments 3929 3930 return self.expression( 3931 exp.With( 3932 expressions=expressions, 3933 recursive=recursive or None, 3934 search=self._parse_recursive_with_search(), 3935 ), 3936 comments=comments, 3937 ) 3938 3939 def _parse_cte(self) -> exp.CTE | None: 3940 index = self._index 3941 3942 alias = self._parse_table_alias(self.ID_VAR_TOKENS) 3943 if not alias or not alias.this: 3944 self.raise_error("Expected CTE to have alias") 3945 3946 key_expressions = ( 3947 self._parse_wrapped_id_vars() if self._match_text_seq("USING", "KEY") else None 3948 ) 3949 3950 if not self._match(TokenType.ALIAS) and not self.OPTIONAL_ALIAS_TOKEN_CTE: 3951 self._retreat(index) 3952 return None 3953 3954 comments = self._prev_comments 3955 3956 if self._match_text_seq("NOT", "MATERIALIZED"): 3957 materialized = False 3958 elif self._match_text_seq("MATERIALIZED"): 3959 materialized = True 3960 else: 3961 materialized = None 3962 3963 cte = self.expression( 3964 exp.CTE( 3965 this=self._parse_wrapped(self._parse_statement), 3966 alias=alias, 3967 materialized=materialized, 3968 key_expressions=key_expressions, 3969 ), 3970 comments=comments, 3971 ) 3972 3973 values = cte.this 3974 if isinstance(values, exp.Values): 3975 if values.alias: 3976 cte.set("this", exp.select("*").from_(values)) 3977 else: 3978 cte.set("this", exp.select("*").from_(exp.alias_(values, "_values", table=True))) 3979 3980 return cte 3981 3982 def _parse_table_alias( 3983 self, alias_tokens: t.Collection[TokenType] | None = None 3984 ) -> exp.TableAlias | None: 3985 # In some dialects, LIMIT and OFFSET can act as both identifiers and keywords (clauses) 3986 # so this section tries to parse the clause version and if it fails, it treats the token 3987 # as an identifier (alias) 3988 if self._can_parse_limit_or_offset(): 3989 return None 3990 3991 any_token = self._match(TokenType.ALIAS) 3992 alias = ( 3993 self._parse_id_var(any_token=any_token, tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 3994 or self._parse_string_as_identifier() 3995 ) 3996 3997 index = self._index 3998 if self._match(TokenType.L_PAREN): 3999 columns = self._parse_csv(self._parse_function_parameter) 4000 self._match_r_paren() if columns else self._retreat(index) 4001 else: 4002 columns = None 4003 4004 if not alias and not columns: 4005 return None 4006 4007 table_alias = self.expression(exp.TableAlias(this=alias, columns=columns)) 4008 4009 # We bubble up comments from the Identifier to the TableAlias 4010 if isinstance(alias, exp.Identifier): 4011 table_alias.add_comments(alias.pop_comments()) 4012 4013 return table_alias 4014 4015 def _parse_subquery( 4016 self, this: exp.Expr | None, parse_alias: bool = True 4017 ) -> exp.Subquery | None: 4018 if not this: 4019 return None 4020 4021 return self.expression( 4022 exp.Subquery( 4023 this=this, 4024 pivots=self._parse_pivots(), 4025 alias=self._parse_table_alias() if parse_alias else None, 4026 sample=self._parse_table_sample(), 4027 ) 4028 ) 4029 4030 def _implicit_unnests_to_explicit(self, this: E) -> E: 4031 from sqlglot.optimizer.normalize_identifiers import normalize_identifiers as _norm 4032 4033 refs = {_norm(this.args["from_"].this.copy(), dialect=self.dialect).alias_or_name} 4034 for i, join in enumerate(this.args.get("joins") or []): 4035 table = join.this 4036 normalized_table = table.copy() 4037 normalized_table.meta["maybe_column"] = True 4038 normalized_table = _norm(normalized_table, dialect=self.dialect) 4039 4040 if isinstance(table, exp.Table) and not join.args.get("on"): 4041 if normalized_table.parts[0].name in refs: 4042 table_as_column = table.to_column() 4043 unnest = exp.Unnest(expressions=[table_as_column]) 4044 4045 # Table.to_column creates a parent Alias node that we want to convert to 4046 # a TableAlias and attach to the Unnest, so it matches the parser's output 4047 if isinstance(table.args.get("alias"), exp.TableAlias): 4048 table_as_column.replace(table_as_column.this) 4049 exp.alias_(unnest, None, table=[table.args["alias"].this], copy=False) 4050 4051 table.replace(unnest) 4052 4053 refs.add(normalized_table.alias_or_name) 4054 4055 return this 4056 4057 @t.overload 4058 def _parse_query_modifiers(self, this: E) -> E: ... 4059 4060 @t.overload 4061 def _parse_query_modifiers(self, this: None) -> None: ... 4062 4063 def _parse_query_modifiers(self, this): 4064 if isinstance(this, self.MODIFIABLES): 4065 for join in self._parse_joins(): 4066 this.append("joins", join) 4067 for lateral in iter(self._parse_lateral, None): 4068 this.append("laterals", lateral) 4069 4070 while True: 4071 if self._match_set(self.QUERY_MODIFIER_PARSERS, advance=False): 4072 modifier_token = self._curr 4073 parser = self.QUERY_MODIFIER_PARSERS[modifier_token.token_type] 4074 key, expression = parser(self) 4075 4076 if expression: 4077 if this.args.get(key): 4078 self.raise_error( 4079 f"Found multiple '{modifier_token.text.upper()}' clauses", 4080 token=modifier_token, 4081 ) 4082 4083 this.set(key, expression) 4084 if key == "limit": 4085 offset = expression.args.get("offset") 4086 expression.set("offset", None) 4087 4088 if offset: 4089 offset = exp.Offset(expression=offset) 4090 this.set("offset", offset) 4091 4092 limit_by_expressions = expression.expressions 4093 expression.set("expressions", None) 4094 offset.set("expressions", limit_by_expressions) 4095 continue 4096 break 4097 4098 if self.SUPPORTS_IMPLICIT_UNNEST and this and this.args.get("from_"): 4099 this = self._implicit_unnests_to_explicit(this) 4100 4101 return this 4102 4103 def _parse_hint_fallback_to_string(self) -> exp.Hint | None: 4104 start = self._curr 4105 while self._curr: 4106 self._advance() 4107 4108 end = self._tokens[self._index - 1] 4109 return exp.Hint(expressions=[self._find_sql(start, end)]) 4110 4111 def _parse_hint_function_call(self) -> exp.Expr | None: 4112 return self._parse_function_call() 4113 4114 def _parse_hint_body(self) -> exp.Hint | None: 4115 start_index = self._index 4116 should_fallback_to_string = False 4117 4118 hints = [] 4119 try: 4120 for hint in iter( 4121 lambda: self._parse_csv( 4122 lambda: self._parse_hint_function_call() or self._parse_var(upper=True), 4123 ), 4124 [], 4125 ): 4126 hints.extend(hint) 4127 except ParseError: 4128 should_fallback_to_string = True 4129 4130 if should_fallback_to_string or self._curr: 4131 self._retreat(start_index) 4132 return self._parse_hint_fallback_to_string() 4133 4134 return self.expression(exp.Hint(expressions=hints)) 4135 4136 def _parse_hint(self) -> exp.Hint | None: 4137 if self._match(TokenType.HINT) and self._prev_comments: 4138 return exp.maybe_parse(self._prev_comments[0], into=exp.Hint, dialect=self.dialect) 4139 4140 return None 4141 4142 def _parse_into(self) -> exp.Into | None: 4143 if not self._match(TokenType.INTO): 4144 return None 4145 4146 temp = self._match(TokenType.TEMPORARY) 4147 unlogged = self._match_text_seq("UNLOGGED") 4148 self._match(TokenType.TABLE) 4149 4150 return self.expression( 4151 exp.Into(this=self._parse_table(schema=True), temporary=temp, unlogged=unlogged) 4152 ) 4153 4154 def _parse_from( 4155 self, 4156 joins: bool = False, 4157 skip_from_token: bool = False, 4158 consume_pipe: bool = False, 4159 ) -> exp.From | None: 4160 if not skip_from_token and not self._match(TokenType.FROM): 4161 return None 4162 4163 comments = self._prev_comments 4164 return self.expression( 4165 exp.From(this=self._parse_table(joins=joins, consume_pipe=consume_pipe)), 4166 comments=comments, 4167 ) 4168 4169 def _parse_match_recognize_measure(self) -> exp.MatchRecognizeMeasure: 4170 return self.expression( 4171 exp.MatchRecognizeMeasure( 4172 window_frame=self._match_texts(("FINAL", "RUNNING")) and self._prev.text.upper(), 4173 this=self._parse_expression(), 4174 ) 4175 ) 4176 4177 def _parse_match_recognize(self) -> exp.MatchRecognize | None: 4178 if not self._match(TokenType.MATCH_RECOGNIZE): 4179 return None 4180 4181 self._match_l_paren() 4182 4183 partition = self._parse_partition_by() 4184 order = self._parse_order() 4185 4186 measures = ( 4187 self._parse_csv(self._parse_match_recognize_measure) 4188 if self._match_text_seq("MEASURES") 4189 else None 4190 ) 4191 4192 if self._match_text_seq("ONE", "ROW", "PER", "MATCH"): 4193 rows = exp.var("ONE ROW PER MATCH") 4194 elif self._match_text_seq("ALL", "ROWS", "PER", "MATCH"): 4195 text = "ALL ROWS PER MATCH" 4196 if self._match_text_seq("SHOW", "EMPTY", "MATCHES"): 4197 text += " SHOW EMPTY MATCHES" 4198 elif self._match_text_seq("OMIT", "EMPTY", "MATCHES"): 4199 text += " OMIT EMPTY MATCHES" 4200 elif self._match_text_seq("WITH", "UNMATCHED", "ROWS"): 4201 text += " WITH UNMATCHED ROWS" 4202 rows = exp.var(text) 4203 else: 4204 rows = None 4205 4206 if self._match_text_seq("AFTER", "MATCH", "SKIP"): 4207 text = "AFTER MATCH SKIP" 4208 if self._match_text_seq("PAST", "LAST", "ROW"): 4209 text += " PAST LAST ROW" 4210 elif self._match_text_seq("TO", "NEXT", "ROW"): 4211 text += " TO NEXT ROW" 4212 elif self._match_text_seq("TO", "FIRST"): 4213 text += f" TO FIRST {self._advance_any().text}" # type: ignore 4214 elif self._match_text_seq("TO", "LAST"): 4215 text += f" TO LAST {self._advance_any().text}" # type: ignore 4216 after = exp.var(text) 4217 else: 4218 after = None 4219 4220 if self._match_text_seq("PATTERN"): 4221 self._match_l_paren() 4222 4223 if not self._curr: 4224 self.raise_error("Expecting )", self._curr) 4225 4226 paren = 1 4227 start = self._curr 4228 4229 while self._curr and paren > 0: 4230 if self._curr.token_type == TokenType.L_PAREN: 4231 paren += 1 4232 if self._curr.token_type == TokenType.R_PAREN: 4233 paren -= 1 4234 4235 end = self._prev 4236 self._advance() 4237 4238 if paren > 0: 4239 self.raise_error("Expecting )", self._curr) 4240 4241 pattern = exp.var(self._find_sql(start, end)) 4242 else: 4243 pattern = None 4244 4245 define = ( 4246 self._parse_csv(self._parse_name_as_expression) 4247 if self._match_text_seq("DEFINE") 4248 else None 4249 ) 4250 4251 self._match_r_paren() 4252 4253 return self.expression( 4254 exp.MatchRecognize( 4255 partition_by=partition, 4256 order=order, 4257 measures=measures, 4258 rows=rows, 4259 after=after, 4260 pattern=pattern, 4261 define=define, 4262 alias=self._parse_table_alias(), 4263 ) 4264 ) 4265 4266 def _parse_lateral(self) -> exp.Lateral | None: 4267 cross_apply: bool | None = None 4268 if self._match_pair(TokenType.CROSS, TokenType.APPLY): 4269 cross_apply = True 4270 elif self._match_pair(TokenType.OUTER, TokenType.APPLY): 4271 cross_apply = False 4272 4273 if cross_apply is not None: 4274 this = self._parse_select(table=True) 4275 view = None 4276 outer = None 4277 elif self._match(TokenType.LATERAL): 4278 this = self._parse_select(table=True) 4279 view = self._match(TokenType.VIEW) 4280 outer = self._match(TokenType.OUTER) 4281 else: 4282 return None 4283 4284 if not this: 4285 this = ( 4286 self._parse_unnest() 4287 or self._parse_function() 4288 or self._parse_id_var(any_token=False) 4289 ) 4290 4291 while self._match(TokenType.DOT): 4292 this = exp.Dot( 4293 this=this, 4294 expression=self._parse_function() or self._parse_id_var(any_token=False), 4295 ) 4296 4297 ordinality: bool | None = None 4298 4299 if view: 4300 table = self._parse_id_var(any_token=False) 4301 columns = self._parse_csv(self._parse_id_var) if self._match(TokenType.ALIAS) else [] 4302 table_alias: exp.TableAlias | None = self.expression( 4303 exp.TableAlias(this=table, columns=columns) 4304 ) 4305 elif isinstance(this, (exp.Subquery, exp.Unnest)) and this.alias: 4306 # We move the alias from the lateral's child node to the lateral itself 4307 table_alias = this.args["alias"].pop() 4308 else: 4309 ordinality = self._match_pair(TokenType.WITH, TokenType.ORDINALITY) 4310 table_alias = self._parse_table_alias() 4311 4312 return self.expression( 4313 exp.Lateral( 4314 this=this, 4315 view=view, 4316 outer=outer, 4317 alias=table_alias, 4318 cross_apply=cross_apply, 4319 ordinality=ordinality, 4320 ) 4321 ) 4322 4323 def _parse_stream(self) -> exp.Stream | None: 4324 index = self._index 4325 if self._match(TokenType.STREAM): 4326 if this := self._try_parse(self._parse_table): 4327 return self.expression(exp.Stream(this=this)) 4328 self._retreat(index) 4329 return None 4330 4331 def _parse_join_parts( 4332 self, 4333 ) -> tuple[Token | None, Token | None, Token | None]: 4334 return ( 4335 self._prev if self._match_set(self.JOIN_METHODS) else None, 4336 self._prev if self._match_set(self.JOIN_SIDES) else None, 4337 self._prev if self._match_set(self.JOIN_KINDS) else None, 4338 ) 4339 4340 def _parse_using_identifiers(self) -> list[exp.Expr]: 4341 def _parse_column_as_identifier() -> exp.Expr | None: 4342 this = self._parse_column() 4343 if isinstance(this, exp.Column): 4344 return this.this 4345 return this 4346 4347 return self._parse_wrapped_csv(_parse_column_as_identifier, optional=True) 4348 4349 def _parse_join( 4350 self, skip_join_token: bool = False, parse_bracket: bool = False 4351 ) -> exp.Join | None: 4352 if self._match(TokenType.COMMA): 4353 table = self._try_parse(self._parse_table) 4354 cross_join = self.expression(exp.Join(this=table)) if table else None 4355 4356 if cross_join and self.JOINS_HAVE_EQUAL_PRECEDENCE: 4357 cross_join.set("kind", "CROSS") 4358 4359 return cross_join 4360 4361 index = self._index 4362 method, side, kind = self._parse_join_parts() 4363 directed = self._match_text_seq("DIRECTED") 4364 hint = self._prev.text if self._match_texts(self.JOIN_HINTS) else None 4365 join = self._match(TokenType.JOIN) or (kind and kind.token_type == TokenType.STRAIGHT_JOIN) 4366 join_comments = self._prev_comments 4367 4368 if not skip_join_token and not join: 4369 self._retreat(index) 4370 kind = None 4371 method = None 4372 side = None 4373 4374 outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY, False) 4375 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY, False) 4376 4377 if not skip_join_token and not join and not outer_apply and not cross_apply: 4378 return None 4379 4380 kwargs: dict[str, t.Any] = {"this": self._parse_table(parse_bracket=parse_bracket)} 4381 if kind and kind.token_type == TokenType.ARRAY and self._match(TokenType.COMMA): 4382 kwargs["expressions"] = self._parse_csv( 4383 lambda: self._parse_table(parse_bracket=parse_bracket) 4384 ) 4385 4386 if method: 4387 kwargs["method"] = method.text.upper() 4388 if side: 4389 kwargs["side"] = side.text.upper() 4390 if kind: 4391 kwargs["kind"] = kind.text.upper() 4392 if hint: 4393 kwargs["hint"] = hint 4394 4395 if self._match(TokenType.MATCH_CONDITION): 4396 kwargs["match_condition"] = self._parse_wrapped(self._parse_comparison) 4397 4398 if self._match(TokenType.ON): 4399 kwargs["on"] = self._parse_disjunction() 4400 elif self._match(TokenType.USING): 4401 kwargs["using"] = self._parse_using_identifiers() 4402 elif ( 4403 not method 4404 and not (outer_apply or cross_apply) 4405 and not isinstance(kwargs["this"], exp.Unnest) 4406 and not (kind and kind.token_type in (TokenType.CROSS, TokenType.ARRAY)) 4407 ): 4408 index = self._index 4409 joins: list | None = list(self._parse_joins()) 4410 4411 if joins and self._match(TokenType.ON): 4412 kwargs["on"] = self._parse_disjunction() 4413 elif joins and self._match(TokenType.USING): 4414 kwargs["using"] = self._parse_using_identifiers() 4415 else: 4416 joins = None 4417 self._retreat(index) 4418 4419 kwargs["this"].set("joins", joins if joins else None) 4420 4421 kwargs["pivots"] = self._parse_pivots() 4422 4423 comments = [c for token in (method, side, kind) if token for c in token.comments] 4424 comments = (join_comments or []) + comments 4425 4426 if ( 4427 self.ADD_JOIN_ON_TRUE 4428 and not kwargs.get("on") 4429 and not kwargs.get("using") 4430 and not kwargs.get("method") 4431 and kwargs.get("kind") in (None, "INNER", "OUTER") 4432 ): 4433 kwargs["on"] = exp.true() 4434 4435 if directed: 4436 kwargs["directed"] = directed 4437 4438 return self.expression(exp.Join(**kwargs), comments=comments) 4439 4440 def _parse_opclass(self) -> exp.Expr | None: 4441 this = self._parse_disjunction() 4442 4443 if self._match_texts(self.OPCLASS_FOLLOW_KEYWORDS, advance=False): 4444 return this 4445 4446 if not self._match_set(self.OPTYPE_FOLLOW_TOKENS, advance=False): 4447 return self.expression(exp.Opclass(this=this, expression=self._parse_table_parts())) 4448 4449 return this 4450 4451 def _parse_index_params(self) -> exp.IndexParameters: 4452 using = self._parse_var(any_token=True) if self._match(TokenType.USING) else None 4453 4454 if self._match(TokenType.L_PAREN, advance=False): 4455 columns = self._parse_wrapped_csv(self._parse_with_operator) 4456 else: 4457 columns = None 4458 4459 include = self._parse_wrapped_id_vars() if self._match_text_seq("INCLUDE") else None 4460 partition_by = self._parse_partition_by() 4461 with_storage = self._match(TokenType.WITH) and self._parse_wrapped_properties() 4462 tablespace = ( 4463 self._parse_var(any_token=True) 4464 if self._match_text_seq("USING", "INDEX", "TABLESPACE") 4465 else None 4466 ) 4467 where = self._parse_where() 4468 4469 on = self._parse_field() if self._match(TokenType.ON) else None 4470 4471 return self.expression( 4472 exp.IndexParameters( 4473 using=using, 4474 columns=columns, 4475 include=include, 4476 partition_by=partition_by, 4477 where=where, 4478 with_storage=with_storage, 4479 tablespace=tablespace, 4480 on=on, 4481 ) 4482 ) 4483 4484 def _parse_index( 4485 self, index: exp.Expr | None = None, anonymous: bool = False 4486 ) -> exp.Index | None: 4487 if index or anonymous: 4488 unique = None 4489 primary = None 4490 amp = None 4491 4492 self._match(TokenType.ON) 4493 self._match(TokenType.TABLE) # hive 4494 table = self._parse_table_parts(schema=True) 4495 else: 4496 unique = self._match(TokenType.UNIQUE) 4497 primary = self._match_text_seq("PRIMARY") 4498 amp = self._match_text_seq("AMP") 4499 4500 if not self._match(TokenType.INDEX): 4501 return None 4502 4503 index = self._parse_id_var() 4504 table = None 4505 4506 params = self._parse_index_params() 4507 4508 return self.expression( 4509 exp.Index( 4510 this=index, table=table, unique=unique, primary=primary, amp=amp, params=params 4511 ) 4512 ) 4513 4514 def _parse_table_hints(self) -> list[exp.Expr] | None: 4515 hints: list[exp.Expr] = [] 4516 if self._match_pair(TokenType.WITH, TokenType.L_PAREN): 4517 # https://learn.microsoft.com/en-us/sql/t-sql/queries/hints-transact-sql-table?view=sql-server-ver16 4518 hints.append( 4519 self.expression( 4520 exp.WithTableHint( 4521 expressions=self._parse_csv( 4522 lambda: self._parse_function() or self._parse_var(any_token=True) 4523 ) 4524 ) 4525 ) 4526 ) 4527 self._match_r_paren() 4528 else: 4529 # https://dev.mysql.com/doc/refman/8.0/en/index-hints.html 4530 while self._match_set(self.TABLE_INDEX_HINT_TOKENS): 4531 hint = exp.IndexTableHint(this=self._prev.text.upper()) 4532 4533 self._match_set((TokenType.INDEX, TokenType.KEY)) 4534 if self._match(TokenType.FOR): 4535 hint.set("target", self._advance_any() and self._prev.text.upper()) 4536 4537 hint.set("expressions", self._parse_wrapped_id_vars()) 4538 hints.append(hint) 4539 4540 return hints or None 4541 4542 def _parse_table_part(self, schema: bool = False) -> exp.Expr | None: 4543 return ( 4544 (not schema and self._parse_function(optional_parens=False)) 4545 or self._parse_id_var(any_token=False) 4546 or self._parse_string_as_identifier() 4547 or self._parse_placeholder() 4548 ) 4549 4550 def _parse_table_parts_fast(self) -> exp.Table | None: 4551 index = self._index 4552 parts: list[exp.Identifier] | None = None 4553 all_comments: list[str] | None = None 4554 4555 while self._match_set(self.IDENTIFIER_TOKENS): 4556 token = self._prev 4557 comments = self._prev_comments 4558 4559 has_dot = self._match(TokenType.DOT) 4560 curr_tt = self._curr.token_type 4561 4562 if not has_dot: 4563 if curr_tt in self.TABLE_POSTFIX_TOKENS: 4564 self._retreat(index) 4565 return None 4566 elif curr_tt not in self.IDENTIFIER_TOKENS: 4567 self._retreat(index) 4568 return None 4569 4570 if parts is None: 4571 parts = [] 4572 4573 if comments: 4574 if all_comments is None: 4575 all_comments = [] 4576 all_comments.extend(comments) 4577 self._prev_comments = [] 4578 4579 parts.append( 4580 self.expression( 4581 exp.Identifier( 4582 this=token.text, quoted=token.token_type == TokenType.IDENTIFIER 4583 ), 4584 token, 4585 ) 4586 ) 4587 4588 if not has_dot: 4589 break 4590 4591 if parts is None: 4592 return None 4593 4594 n = len(parts) 4595 4596 if n == 1: 4597 table: exp.Table = exp.Table(this=parts[0]) 4598 elif n == 2: 4599 table = exp.Table(this=parts[1], db=parts[0]) 4600 elif n >= 3: 4601 this: exp.Identifier | exp.Dot = parts[2] 4602 for i in range(3, n): 4603 this = exp.Dot(this=this, expression=parts[i]) 4604 4605 table = exp.Table(this=this, db=parts[1], catalog=parts[0]) 4606 4607 if table is None: 4608 self._retreat(index) 4609 elif all_comments: 4610 table.add_comments(all_comments) 4611 return table 4612 4613 def _parse_table_parts( 4614 self, 4615 schema: bool = False, 4616 is_db_reference: bool = False, 4617 wildcard: bool = False, 4618 fast: bool = False, 4619 ) -> exp.Table | exp.Dot | None: 4620 if fast: 4621 return self._parse_table_parts_fast() 4622 4623 catalog: exp.Expr | str | None = None 4624 db: exp.Expr | str | None = None 4625 table: exp.Expr | str | None = self._parse_table_part(schema=schema) 4626 4627 while self._match(TokenType.DOT): 4628 if catalog: 4629 # This allows nesting the table in arbitrarily many dot expressions if needed 4630 table = self.expression( 4631 exp.Dot(this=table, expression=self._parse_table_part(schema=schema)) 4632 ) 4633 else: 4634 catalog = db 4635 db = table 4636 # "" used for tsql FROM a..b case 4637 table = self._parse_table_part(schema=schema) or "" 4638 4639 if ( 4640 wildcard 4641 and self._is_connected() 4642 and (isinstance(table, exp.Identifier) or not table) 4643 and self._match(TokenType.STAR) 4644 ): 4645 if isinstance(table, exp.Identifier): 4646 table.args["this"] += "*" 4647 else: 4648 table = exp.Identifier(this="*") 4649 4650 if is_db_reference: 4651 catalog = db 4652 db = table 4653 table = None 4654 4655 if not table and not is_db_reference: 4656 self.raise_error(f"Expected table name but got {self._curr}") 4657 if not db and is_db_reference: 4658 self.raise_error(f"Expected database name but got {self._curr}") 4659 4660 table = self.expression(exp.Table(this=table, db=db, catalog=catalog)) 4661 4662 # Bubble up comments from identifier parts to the Table 4663 comments = [] 4664 for part in table.parts: 4665 if part_comments := part.pop_comments(): 4666 comments.extend(part_comments) 4667 if comments: 4668 table.add_comments(comments) 4669 4670 changes = self._parse_changes() 4671 if changes: 4672 table.set("changes", changes) 4673 4674 at_before = self._parse_historical_data() 4675 if at_before: 4676 table.set("when", at_before) 4677 4678 pivots = self._parse_pivots() 4679 if pivots: 4680 table.set("pivots", pivots) 4681 4682 return table 4683 4684 def _parse_table( 4685 self, 4686 schema: bool = False, 4687 joins: bool = False, 4688 alias_tokens: t.Collection[TokenType] | None = None, 4689 parse_bracket: bool = False, 4690 is_db_reference: bool = False, 4691 parse_partition: bool = False, 4692 consume_pipe: bool = False, 4693 ) -> exp.Expr | None: 4694 if not schema and not is_db_reference and not consume_pipe and not joins: 4695 index = self._index 4696 table = self._parse_table_parts(fast=True) 4697 4698 if table is not None: 4699 curr_tt = self._curr.token_type 4700 next_tt = self._next.token_type 4701 4702 fast_terminators = self.TABLE_TERMINATORS 4703 4704 # only return the table if we're sure there are no other operators 4705 # MATCH_CONDITION is a special case because it accepts any alias before it like LIMIT 4706 if curr_tt in fast_terminators and next_tt != TokenType.MATCH_CONDITION: 4707 return table 4708 4709 postfix_tokens = self.TABLE_POSTFIX_TOKENS 4710 4711 if curr_tt not in postfix_tokens and next_tt not in postfix_tokens: 4712 if alias := self._parse_table_alias( 4713 alias_tokens=alias_tokens or self.TABLE_ALIAS_TOKENS 4714 ): 4715 table.set("alias", alias) 4716 4717 if self._curr.token_type in fast_terminators: 4718 return table 4719 4720 self._retreat(index) 4721 4722 if stream := self._parse_stream(): 4723 return stream 4724 4725 if lateral := self._parse_lateral(): 4726 return lateral 4727 4728 if unnest := self._parse_unnest(): 4729 return unnest 4730 4731 if values := self._parse_derived_table_values(): 4732 return values 4733 4734 if subquery := self._parse_select(table=True, consume_pipe=consume_pipe): 4735 if not subquery.args.get("pivots"): 4736 subquery.set("pivots", self._parse_pivots()) 4737 return subquery 4738 4739 bracket = parse_bracket and self._parse_bracket(None) 4740 bracket = self.expression(exp.Table(this=bracket)) if bracket else None 4741 4742 rows_from_tables = ( 4743 self._parse_wrapped_csv(self._parse_table) 4744 if self._match_text_seq("ROWS", "FROM") 4745 else None 4746 ) 4747 rows_from = ( 4748 self.expression(exp.Table(rows_from=rows_from_tables)) if rows_from_tables else None 4749 ) 4750 4751 only = self._match(TokenType.ONLY) 4752 4753 this = t.cast( 4754 exp.Expr, 4755 bracket 4756 or rows_from 4757 or self._parse_bracket( 4758 self._parse_table_parts(schema=schema, is_db_reference=is_db_reference) 4759 ), 4760 ) 4761 4762 if only: 4763 this.set("only", only) 4764 4765 # Postgres supports a wildcard (table) suffix operator, which is a no-op in this context 4766 self._match(TokenType.STAR) 4767 4768 parse_partition = parse_partition or self.SUPPORTS_PARTITION_SELECTION 4769 if parse_partition and self._match(TokenType.PARTITION, advance=False): 4770 this.set("partition", self._parse_partition()) 4771 4772 if schema: 4773 return self._parse_schema(this=this) 4774 4775 if self.dialect.ALIAS_POST_VERSION: 4776 this.set("version", self._parse_version()) 4777 4778 if self.dialect.ALIAS_POST_TABLESAMPLE: 4779 this.set("sample", self._parse_table_sample()) 4780 4781 alias = self._parse_table_alias(alias_tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 4782 if alias: 4783 this.set("alias", alias) 4784 4785 if self._match(TokenType.INDEXED_BY): 4786 this.set("indexed", self._parse_table_parts()) 4787 elif self._match_text_seq("NOT", "INDEXED"): 4788 this.set("indexed", False) 4789 4790 if isinstance(this, exp.Table) and self._match_text_seq("AT"): 4791 return self.expression( 4792 exp.AtIndex(this=this.to_column(copy=False), expression=self._parse_id_var()) 4793 ) 4794 4795 this.set("hints", self._parse_table_hints()) 4796 4797 if not this.args.get("pivots"): 4798 this.set("pivots", self._parse_pivots()) 4799 4800 if not self.dialect.ALIAS_POST_TABLESAMPLE: 4801 this.set("sample", self._parse_table_sample()) 4802 4803 if not self.dialect.ALIAS_POST_VERSION: 4804 this.set("version", self._parse_version()) 4805 4806 if joins: 4807 for join in self._parse_joins(): 4808 this.append("joins", join) 4809 4810 if self._match_pair(TokenType.WITH, TokenType.ORDINALITY): 4811 this.set("ordinality", True) 4812 this.set("alias", self._parse_table_alias()) 4813 4814 return this 4815 4816 def _parse_version(self) -> exp.Version | None: 4817 if self._match(TokenType.TIMESTAMP_SNAPSHOT): 4818 this = "TIMESTAMP" 4819 elif self._match(TokenType.VERSION_SNAPSHOT): 4820 this = "VERSION" 4821 else: 4822 return None 4823 4824 if self._match_set((TokenType.FROM, TokenType.BETWEEN)): 4825 kind = self._prev.text.upper() 4826 start = self._parse_bitwise() 4827 self._match_texts(("TO", "AND")) 4828 end = self._parse_bitwise() 4829 expression: exp.Expr | None = self.expression(exp.Tuple(expressions=[start, end])) 4830 elif self._match_text_seq("CONTAINED", "IN"): 4831 kind = "CONTAINED IN" 4832 expression = self.expression( 4833 exp.Tuple(expressions=self._parse_wrapped_csv(self._parse_bitwise)) 4834 ) 4835 elif self._match(TokenType.ALL): 4836 kind = "ALL" 4837 expression = None 4838 else: 4839 self._match_text_seq("AS", "OF") 4840 kind = "AS OF" 4841 expression = self._parse_type() 4842 4843 return self.expression(exp.Version(this=this, expression=expression, kind=kind)) 4844 4845 def _parse_historical_data(self) -> exp.HistoricalData | None: 4846 # https://docs.snowflake.com/en/sql-reference/constructs/at-before 4847 index = self._index 4848 historical_data = None 4849 if self._match_texts(self.HISTORICAL_DATA_PREFIX): 4850 this = self._prev.text.upper() 4851 kind = ( 4852 self._match(TokenType.L_PAREN) 4853 and self._match_texts(self.HISTORICAL_DATA_KIND) 4854 and self._prev.text.upper() 4855 ) 4856 expression = self._match(TokenType.FARROW) and self._parse_bitwise() 4857 4858 if expression: 4859 self._match_r_paren() 4860 historical_data = self.expression( 4861 exp.HistoricalData(this=this, kind=kind, expression=expression) 4862 ) 4863 else: 4864 self._retreat(index) 4865 4866 return historical_data 4867 4868 def _parse_changes(self) -> exp.Changes | None: 4869 if not self._match_text_seq("CHANGES", "(", "INFORMATION", "=>"): 4870 return None 4871 4872 information = self._parse_var(any_token=True) 4873 self._match_r_paren() 4874 4875 return self.expression( 4876 exp.Changes( 4877 information=information, 4878 at_before=self._parse_historical_data(), 4879 end=self._parse_historical_data(), 4880 ) 4881 ) 4882 4883 def _parse_unnest(self, with_alias: bool = True) -> exp.Unnest | None: 4884 if not self._match_pair(TokenType.UNNEST, TokenType.L_PAREN, advance=False): 4885 return None 4886 4887 self._advance() 4888 4889 expressions = self._parse_wrapped_csv(self._parse_equality) 4890 offset: bool | exp.Expr = self._match_pair(TokenType.WITH, TokenType.ORDINALITY) 4891 4892 alias = self._parse_table_alias() if with_alias else None 4893 4894 if alias: 4895 if self.dialect.UNNEST_COLUMN_ONLY: 4896 if alias.args.get("columns"): 4897 self.raise_error("Unexpected extra column alias in unnest.") 4898 4899 alias.set("columns", [alias.this]) 4900 alias.set("this", None) 4901 4902 columns = alias.args.get("columns") or [] 4903 if offset and len(expressions) < len(columns): 4904 offset = columns.pop() 4905 4906 if not offset and self._match_pair(TokenType.WITH, TokenType.OFFSET): 4907 self._match(TokenType.ALIAS) 4908 offset = self._parse_id_var( 4909 any_token=False, tokens=self.UNNEST_OFFSET_ALIAS_TOKENS 4910 ) or exp.to_identifier("offset") 4911 4912 return self.expression(exp.Unnest(expressions=expressions, alias=alias, offset=offset)) 4913 4914 def _parse_derived_table_values(self) -> exp.Values | None: 4915 is_derived = self._match_pair(TokenType.L_PAREN, TokenType.VALUES) 4916 if not is_derived and not ( 4917 # ClickHouse's `FORMAT Values` is equivalent to `VALUES` 4918 self._match_text_seq("VALUES") or self._match_text_seq("FORMAT", "VALUES") 4919 ): 4920 return None 4921 4922 expressions = self._parse_csv(self._parse_value) 4923 alias = self._parse_table_alias() 4924 4925 if is_derived: 4926 self._match_r_paren() 4927 4928 return self.expression( 4929 exp.Values(expressions=expressions, alias=alias or self._parse_table_alias()) 4930 ) 4931 4932 def _parse_table_sample(self, as_modifier: bool = False) -> exp.TableSample | None: 4933 if not self._match(TokenType.TABLE_SAMPLE) and not ( 4934 as_modifier and self._match_text_seq("USING", "SAMPLE") 4935 ): 4936 return None 4937 4938 bucket_numerator = None 4939 bucket_denominator = None 4940 bucket_field = None 4941 percent = None 4942 size = None 4943 seed = None 4944 4945 method = self._parse_var(tokens=(TokenType.ROW,), upper=True) 4946 matched_l_paren = self._match(TokenType.L_PAREN) 4947 4948 if self.TABLESAMPLE_CSV: 4949 num = None 4950 expressions = self._parse_csv(self._parse_primary) 4951 else: 4952 expressions = None 4953 num = ( 4954 self._parse_factor() 4955 if self._match(TokenType.NUMBER, advance=False) 4956 else self._parse_primary() or self._parse_placeholder() 4957 ) 4958 4959 if self._match_text_seq("BUCKET"): 4960 bucket_numerator = self._parse_number() 4961 self._match_text_seq("OUT", "OF") 4962 bucket_denominator = bucket_denominator = self._parse_number() 4963 self._match(TokenType.ON) 4964 bucket_field = self._parse_field() 4965 elif self._match_set((TokenType.PERCENT, TokenType.MOD)): 4966 percent = num 4967 elif self._match(TokenType.ROWS) or not self.dialect.TABLESAMPLE_SIZE_IS_PERCENT: 4968 size = num 4969 else: 4970 percent = num 4971 4972 if matched_l_paren: 4973 self._match_r_paren() 4974 4975 if self._match(TokenType.L_PAREN): 4976 method = self._parse_var(upper=True) 4977 seed = self._match(TokenType.COMMA) and self._parse_number() 4978 self._match_r_paren() 4979 elif self._match_texts(("SEED", "REPEATABLE")): 4980 seed = self._parse_wrapped(self._parse_number) 4981 4982 if not method and self.DEFAULT_SAMPLING_METHOD: 4983 method = exp.var(self.DEFAULT_SAMPLING_METHOD) 4984 4985 return self.expression( 4986 exp.TableSample( 4987 expressions=expressions, 4988 method=method, 4989 bucket_numerator=bucket_numerator, 4990 bucket_denominator=bucket_denominator, 4991 bucket_field=bucket_field, 4992 percent=percent, 4993 size=size, 4994 seed=seed, 4995 ) 4996 ) 4997 4998 def _parse_pivots(self) -> list[exp.Pivot] | None: 4999 return list(iter(self._parse_pivot, None)) or None 5000 5001 def _parse_joins(self) -> t.Iterator[exp.Join]: 5002 return iter(self._parse_join, None) 5003 5004 def _parse_unpivot_columns(self) -> exp.UnpivotColumns | None: 5005 if not self._match(TokenType.INTO): 5006 return None 5007 5008 return self.expression( 5009 exp.UnpivotColumns( 5010 this=self._match_text_seq("NAME") and self._parse_column(), 5011 expressions=self._match_text_seq("VALUE") and self._parse_csv(self._parse_column), 5012 ) 5013 ) 5014 5015 # https://duckdb.org/docs/sql/statements/pivot 5016 def _parse_simplified_pivot(self, is_unpivot: bool | None = None) -> exp.Pivot: 5017 def _parse_on() -> exp.Expr | None: 5018 this = self._parse_bitwise() 5019 5020 if self._match(TokenType.IN): 5021 # PIVOT ... ON col IN (row_val1, row_val2) 5022 return self._parse_in(this) 5023 if self._match(TokenType.ALIAS, advance=False): 5024 # UNPIVOT ... ON (col1, col2, col3) AS row_val 5025 return self._parse_alias(this) 5026 5027 return this 5028 5029 this = self._parse_table() 5030 expressions = self._match(TokenType.ON) and self._parse_csv(_parse_on) 5031 into = self._parse_unpivot_columns() 5032 using = self._match(TokenType.USING) and self._parse_csv( 5033 lambda: self._parse_alias(self._parse_column()) 5034 ) 5035 group = self._parse_group() 5036 5037 return self.expression( 5038 exp.Pivot( 5039 this=this, 5040 expressions=expressions, 5041 using=using, 5042 group=group, 5043 unpivot=is_unpivot, 5044 into=into, 5045 ) 5046 ) 5047 5048 def _parse_pivot_in(self) -> exp.In: 5049 def _parse_aliased_expression() -> exp.Expr | None: 5050 this = self._parse_select_or_expression() 5051 5052 self._match(TokenType.ALIAS) 5053 alias = self._parse_bitwise() 5054 if alias: 5055 if isinstance(alias, exp.Column) and not alias.db: 5056 alias = alias.this 5057 return self.expression(exp.PivotAlias(this=this, alias=alias)) 5058 5059 return this 5060 5061 value = self._parse_column() 5062 5063 if not self._match(TokenType.IN): 5064 self.raise_error("Expecting IN") 5065 5066 if self._match(TokenType.L_PAREN): 5067 if self._match(TokenType.ANY): 5068 exprs: list[exp.Expr] = ensure_list(exp.PivotAny(this=self._parse_order())) 5069 else: 5070 exprs = self._parse_csv(_parse_aliased_expression) 5071 self._match_r_paren() 5072 return self.expression(exp.In(this=value, expressions=exprs)) 5073 5074 return self.expression(exp.In(this=value, field=self._parse_id_var())) 5075 5076 def _parse_pivot_aggregation(self) -> exp.Expr | None: 5077 func = self._parse_function() 5078 if not func: 5079 if self._prev.token_type == TokenType.COMMA: 5080 return None 5081 self.raise_error("Expecting an aggregation function in PIVOT") 5082 5083 return self._parse_alias(func) 5084 5085 def _parse_pivot(self) -> exp.Pivot | None: 5086 index = self._index 5087 include_nulls = None 5088 5089 if self._match(TokenType.PIVOT): 5090 unpivot = False 5091 elif self._match(TokenType.UNPIVOT): 5092 unpivot = True 5093 5094 # https://docs.databricks.com/en/sql/language-manual/sql-ref-syntax-qry-select-unpivot.html#syntax 5095 if self._match_text_seq("INCLUDE", "NULLS"): 5096 include_nulls = True 5097 elif self._match_text_seq("EXCLUDE", "NULLS"): 5098 include_nulls = False 5099 else: 5100 return None 5101 5102 expressions = [] 5103 5104 if not self._match(TokenType.L_PAREN): 5105 self._retreat(index) 5106 return None 5107 5108 if unpivot: 5109 expressions = self._parse_csv(self._parse_column) 5110 else: 5111 expressions = self._parse_csv(self._parse_pivot_aggregation) 5112 5113 if not expressions: 5114 self.raise_error("Failed to parse PIVOT's aggregation list") 5115 5116 if not self._match(TokenType.FOR): 5117 self.raise_error("Expecting FOR") 5118 5119 fields = [] 5120 while True: 5121 field = self._try_parse(self._parse_pivot_in) 5122 if not field: 5123 break 5124 fields.append(field) 5125 5126 default_on_null = self._match_text_seq("DEFAULT", "ON", "NULL") and self._parse_wrapped( 5127 self._parse_bitwise 5128 ) 5129 5130 group = self._parse_group() 5131 5132 self._match_r_paren() 5133 5134 pivot = self.expression( 5135 exp.Pivot( 5136 expressions=expressions, 5137 fields=fields, 5138 unpivot=unpivot, 5139 include_nulls=include_nulls, 5140 default_on_null=default_on_null, 5141 group=group, 5142 ) 5143 ) 5144 5145 if not self._match_set((TokenType.PIVOT, TokenType.UNPIVOT), advance=False): 5146 pivot.set("alias", self._parse_table_alias()) 5147 5148 if not unpivot: 5149 names = self._pivot_column_names(t.cast(list[exp.Expr], expressions)) 5150 5151 columns: list[exp.Expr] = [] 5152 all_fields = [] 5153 for pivot_field in pivot.fields: 5154 pivot_field_expressions = pivot_field.expressions 5155 5156 # The `PivotAny` expression corresponds to `ANY ORDER BY <column>`; we can't infer in this case. 5157 if isinstance(seq_get(pivot_field_expressions, 0), exp.PivotAny): 5158 continue 5159 5160 all_fields.append( 5161 [ 5162 fld.sql() if self.IDENTIFY_PIVOT_STRINGS else fld.alias_or_name 5163 for fld in pivot_field_expressions 5164 ] 5165 ) 5166 5167 if all_fields: 5168 if names: 5169 all_fields.append(names) 5170 5171 # Generate all possible combinations of the pivot columns 5172 # e.g PIVOT(sum(...) as total FOR year IN (2000, 2010) FOR country IN ('NL', 'US')) 5173 # generates the product between [[2000, 2010], ['NL', 'US'], ['total']] 5174 for fld_parts_tuple in itertools.product(*all_fields): 5175 fld_parts = list(fld_parts_tuple) 5176 5177 if names and self.PREFIXED_PIVOT_COLUMNS: 5178 # Move the "name" to the front of the list 5179 fld_parts.insert(0, fld_parts.pop(-1)) 5180 5181 columns.append(exp.to_identifier("_".join(fld_parts))) 5182 5183 pivot.set("columns", columns) 5184 5185 return pivot 5186 5187 def _pivot_column_names(self, aggregations: list[exp.Expr]) -> list[str]: 5188 return [agg.alias for agg in aggregations if agg.alias] 5189 5190 def _parse_prewhere(self, skip_where_token: bool = False) -> exp.PreWhere | None: 5191 if not skip_where_token and not self._match(TokenType.PREWHERE): 5192 return None 5193 5194 comments = self._prev_comments 5195 return self.expression( 5196 exp.PreWhere(this=self._parse_disjunction()), 5197 comments=comments, 5198 ) 5199 5200 def _parse_where(self, skip_where_token: bool = False) -> exp.Where | None: 5201 if not skip_where_token and not self._match(TokenType.WHERE): 5202 return None 5203 5204 comments = self._prev_comments 5205 return self.expression( 5206 exp.Where(this=self._parse_disjunction()), 5207 comments=comments, 5208 ) 5209 5210 def _parse_group(self, skip_group_by_token: bool = False) -> exp.Group | None: 5211 if not skip_group_by_token and not self._match(TokenType.GROUP_BY): 5212 return None 5213 comments = self._prev_comments 5214 5215 elements: dict[str, t.Any] = defaultdict(list) 5216 5217 if self._match(TokenType.ALL): 5218 elements["all"] = True 5219 elif self._match(TokenType.DISTINCT): 5220 elements["all"] = False 5221 5222 if self._match_set(self.QUERY_MODIFIER_TOKENS, advance=False): 5223 return self.expression(exp.Group(**elements), comments=comments) # type: ignore 5224 5225 while True: 5226 index = self._index 5227 5228 elements["expressions"].extend( 5229 self._parse_csv( 5230 lambda: ( 5231 None 5232 if self._match_set((TokenType.CUBE, TokenType.ROLLUP), advance=False) 5233 else self._parse_disjunction() 5234 ) 5235 ) 5236 ) 5237 5238 before_with_index = self._index 5239 with_prefix = self._match(TokenType.WITH) 5240 5241 if cube_or_rollup := self._parse_cube_or_rollup(with_prefix=with_prefix): 5242 key = "rollup" if isinstance(cube_or_rollup, exp.Rollup) else "cube" 5243 elements[key].append(cube_or_rollup) 5244 elif grouping_sets := self._parse_grouping_sets(): 5245 elements["grouping_sets"].append(grouping_sets) 5246 elif self._match_text_seq("TOTALS"): 5247 elements["totals"] = True # type: ignore 5248 5249 if before_with_index <= self._index <= before_with_index + 1: 5250 self._retreat(before_with_index) 5251 break 5252 5253 if index == self._index: 5254 break 5255 5256 return self.expression(exp.Group(**elements), comments=comments) # type: ignore 5257 5258 def _parse_cube_or_rollup(self, with_prefix: bool = False) -> exp.Cube | exp.Rollup | None: 5259 if self._match(TokenType.CUBE): 5260 kind: type[exp.Cube | exp.Rollup] = exp.Cube 5261 elif self._match(TokenType.ROLLUP): 5262 kind = exp.Rollup 5263 else: 5264 return None 5265 5266 return self.expression( 5267 kind(expressions=[] if with_prefix else self._parse_wrapped_csv(self._parse_bitwise)) 5268 ) 5269 5270 def _parse_grouping_sets(self) -> exp.GroupingSets | None: 5271 if self._match(TokenType.GROUPING_SETS): 5272 return self.expression( 5273 exp.GroupingSets(expressions=self._parse_wrapped_csv(self._parse_grouping_set)) 5274 ) 5275 return None 5276 5277 def _parse_grouping_set(self) -> exp.Expr | None: 5278 return self._parse_grouping_sets() or self._parse_cube_or_rollup() or self._parse_bitwise() 5279 5280 def _parse_having(self, skip_having_token: bool = False) -> exp.Having | None: 5281 if not skip_having_token and not self._match(TokenType.HAVING): 5282 return None 5283 comments = self._prev_comments 5284 return self.expression( 5285 exp.Having(this=self._parse_disjunction()), 5286 comments=comments, 5287 ) 5288 5289 def _parse_qualify(self) -> exp.Qualify | None: 5290 if not self._match(TokenType.QUALIFY): 5291 return None 5292 return self.expression(exp.Qualify(this=self._parse_disjunction())) 5293 5294 def _parse_connect_with_prior(self) -> exp.Expr | None: 5295 self.NO_PAREN_FUNCTION_PARSERS["PRIOR"] = lambda self: self.expression( 5296 exp.Prior(this=self._parse_bitwise()) 5297 ) 5298 connect = self._parse_disjunction() 5299 self.NO_PAREN_FUNCTION_PARSERS.pop("PRIOR") 5300 return connect 5301 5302 def _parse_connect(self, skip_start_token: bool = False) -> exp.Connect | None: 5303 if skip_start_token: 5304 start = None 5305 elif self._match(TokenType.START_WITH): 5306 start = self._parse_disjunction() 5307 else: 5308 return None 5309 5310 self._match(TokenType.CONNECT_BY) 5311 nocycle = self._match_text_seq("NOCYCLE") 5312 connect = self._parse_connect_with_prior() 5313 5314 if not start and self._match(TokenType.START_WITH): 5315 start = self._parse_disjunction() 5316 5317 return self.expression(exp.Connect(start=start, connect=connect, nocycle=nocycle)) 5318 5319 def _parse_name_as_expression(self) -> exp.Expr | None: 5320 this = self._parse_id_var(any_token=True) 5321 if self._match(TokenType.ALIAS): 5322 this = self.expression(exp.Alias(alias=this, this=self._parse_disjunction())) 5323 return this 5324 5325 def _parse_interpolate(self) -> list[exp.Expr] | None: 5326 if self._match_text_seq("INTERPOLATE"): 5327 return self._parse_wrapped_csv(self._parse_name_as_expression) 5328 return None 5329 5330 def _parse_order( 5331 self, this: exp.Expr | None = None, skip_order_token: bool = False 5332 ) -> exp.Expr | None: 5333 siblings = None 5334 if not skip_order_token and not self._match(TokenType.ORDER_BY): 5335 if not self._match(TokenType.ORDER_SIBLINGS_BY): 5336 return this 5337 5338 siblings = True 5339 5340 comments = self._prev_comments 5341 return self.expression( 5342 exp.Order( 5343 this=this, 5344 expressions=self._parse_csv(self._parse_ordered), 5345 siblings=siblings, 5346 ), 5347 comments=comments, 5348 ) 5349 5350 def _parse_sort(self, exp_class: type[E], token: TokenType) -> E | None: 5351 if not self._match(token): 5352 return None 5353 return self.expression(exp_class(expressions=self._parse_csv(self._parse_ordered))) 5354 5355 def _parse_ordered( 5356 self, parse_method: t.Callable[[], exp.Expr | None] | None = None 5357 ) -> exp.Ordered | None: 5358 this = parse_method() if parse_method else self._parse_disjunction() 5359 if not this: 5360 return None 5361 5362 if this.name.upper() == "ALL" and self.dialect.SUPPORTS_ORDER_BY_ALL: 5363 this = exp.var("ALL") 5364 5365 asc = self._match(TokenType.ASC) 5366 desc: bool | None = True if self._match(TokenType.DESC) else (False if asc else None) 5367 5368 is_nulls_first = self._match_text_seq("NULLS", "FIRST") 5369 is_nulls_last = self._match_text_seq("NULLS", "LAST") 5370 5371 nulls_first = is_nulls_first or False 5372 explicitly_null_ordered = is_nulls_first or is_nulls_last 5373 5374 if ( 5375 not explicitly_null_ordered 5376 and ( 5377 (not desc and self.dialect.NULL_ORDERING == "nulls_are_small") 5378 or (desc and self.dialect.NULL_ORDERING != "nulls_are_small") 5379 ) 5380 and self.dialect.NULL_ORDERING != "nulls_are_last" 5381 ): 5382 nulls_first = True 5383 5384 if self._match_text_seq("WITH", "FILL"): 5385 with_fill = self.expression( 5386 exp.WithFill( 5387 from_=self._match(TokenType.FROM) and self._parse_bitwise(), 5388 to=self._match_text_seq("TO") and self._parse_bitwise(), 5389 step=self._match_text_seq("STEP") and self._parse_bitwise(), 5390 interpolate=self._parse_interpolate(), 5391 ) 5392 ) 5393 else: 5394 with_fill = None 5395 5396 return self.expression( 5397 exp.Ordered(this=this, desc=desc, nulls_first=nulls_first, with_fill=with_fill) 5398 ) 5399 5400 def _parse_limit_options(self) -> exp.LimitOptions | None: 5401 percent = self._match_set((TokenType.PERCENT, TokenType.MOD)) 5402 rows = self._match_set((TokenType.ROW, TokenType.ROWS)) 5403 self._match_text_seq("ONLY") 5404 with_ties = self._match_text_seq("WITH", "TIES") 5405 5406 if not (percent or rows or with_ties): 5407 return None 5408 5409 return self.expression(exp.LimitOptions(percent=percent, rows=rows, with_ties=with_ties)) 5410 5411 def _parse_limit( 5412 self, 5413 this: exp.Expr | None = None, 5414 top: bool = False, 5415 skip_limit_token: bool = False, 5416 ) -> exp.Expr | None: 5417 if skip_limit_token or self._match(TokenType.TOP if top else TokenType.LIMIT): 5418 comments = self._prev_comments 5419 if top: 5420 limit_paren = self._match(TokenType.L_PAREN) 5421 expression = ( 5422 self._parse_term() or self._parse_select() 5423 if limit_paren 5424 else self._parse_number() 5425 ) 5426 5427 if limit_paren: 5428 self._match_r_paren() 5429 5430 else: 5431 # Parsing LIMIT x% (i.e x PERCENT) as a term leads to an error, since 5432 # we try to build an exp.Mod expr. For that matter, we backtrack and instead 5433 # consume the factor plus parse the percentage separately 5434 index = self._index 5435 expression = self._try_parse(self._parse_term) 5436 if isinstance(expression, exp.Mod): 5437 self._retreat(index) 5438 expression = self._parse_factor() 5439 elif not expression: 5440 expression = self._parse_factor() 5441 limit_options = self._parse_limit_options() 5442 5443 if self._match(TokenType.COMMA): 5444 offset = expression 5445 expression = self._parse_term() 5446 else: 5447 offset = None 5448 5449 limit_exp = self.expression( 5450 exp.Limit( 5451 this=this, 5452 expression=expression, 5453 offset=offset, 5454 limit_options=limit_options, 5455 expressions=self._parse_limit_by(), 5456 ), 5457 comments=comments, 5458 ) 5459 5460 return limit_exp 5461 5462 if self._match(TokenType.FETCH): 5463 direction = ( 5464 self._prev.text.upper() 5465 if self._match_set((TokenType.FIRST, TokenType.NEXT)) 5466 else "FIRST" 5467 ) 5468 5469 count = self._parse_field(tokens=self.FETCH_TOKENS) 5470 5471 return self.expression( 5472 exp.Fetch( 5473 direction=direction, count=count, limit_options=self._parse_limit_options() 5474 ) 5475 ) 5476 5477 return this 5478 5479 def _parse_offset(self, this: exp.Expr | None = None) -> exp.Expr | None: 5480 if not self._match(TokenType.OFFSET): 5481 return this 5482 5483 count = self._parse_term() 5484 self._match_set((TokenType.ROW, TokenType.ROWS)) 5485 5486 return self.expression( 5487 exp.Offset(this=this, expression=count, expressions=self._parse_limit_by()) 5488 ) 5489 5490 def _can_parse_limit_or_offset(self) -> bool: 5491 if not self._match_set(self.AMBIGUOUS_ALIAS_TOKENS, advance=False): 5492 return False 5493 5494 index = self._index 5495 result = bool( 5496 self._try_parse(self._parse_limit, retreat=True) 5497 or self._try_parse(self._parse_offset, retreat=True) 5498 ) 5499 self._retreat(index) 5500 5501 # MATCH_CONDITION (...) is a special construct that should not be consumed by limit/offset 5502 if self._next.token_type == TokenType.MATCH_CONDITION: 5503 result = False 5504 5505 return result 5506 5507 def _parse_limit_by(self) -> list[exp.Expr] | None: 5508 return self._parse_csv(self._parse_bitwise) if self._match_text_seq("BY") else None 5509 5510 def _parse_locks(self) -> list[exp.Lock]: 5511 locks = [] 5512 while True: 5513 update, key = None, None 5514 if self._match_text_seq("FOR", "UPDATE"): 5515 update = True 5516 elif self._match_text_seq("FOR", "SHARE") or self._match_text_seq( 5517 "LOCK", "IN", "SHARE", "MODE" 5518 ): 5519 update = False 5520 elif self._match_text_seq("FOR", "KEY", "SHARE"): 5521 update, key = False, True 5522 elif self._match_text_seq("FOR", "NO", "KEY", "UPDATE"): 5523 update, key = True, True 5524 else: 5525 break 5526 5527 expressions = None 5528 if self._match_text_seq("OF"): 5529 expressions = self._parse_csv(lambda: self._parse_table(schema=True)) 5530 5531 wait: bool | exp.Expr | None = None 5532 if self._match_text_seq("NOWAIT"): 5533 wait = True 5534 elif self._match_text_seq("WAIT"): 5535 wait = self._parse_primary() 5536 elif self._match_text_seq("SKIP", "LOCKED"): 5537 wait = False 5538 5539 locks.append( 5540 self.expression( 5541 exp.Lock(update=update, expressions=expressions, wait=wait, key=key) 5542 ) 5543 ) 5544 5545 return locks 5546 5547 def parse_set_operation( 5548 self, this: exp.Expr | None, consume_pipe: bool = False 5549 ) -> exp.Expr | None: 5550 start = self._index 5551 _, side_token, kind_token = self._parse_join_parts() 5552 5553 side = side_token.text if side_token else None 5554 kind = kind_token.text if kind_token else None 5555 5556 if not self._match_set(self.SET_OPERATIONS): 5557 self._retreat(start) 5558 return None 5559 5560 token_type = self._prev.token_type 5561 5562 if token_type == TokenType.UNION: 5563 operation: type[exp.SetOperation] = exp.Union 5564 elif token_type == TokenType.EXCEPT: 5565 operation = exp.Except 5566 else: 5567 operation = exp.Intersect 5568 5569 comments = self._prev.comments 5570 5571 if self._match(TokenType.DISTINCT): 5572 distinct: bool | None = True 5573 elif self._match(TokenType.ALL): 5574 distinct = False 5575 else: 5576 distinct = self.dialect.SET_OP_DISTINCT_BY_DEFAULT[operation] 5577 if distinct is None: 5578 self.raise_error(f"Expected DISTINCT or ALL for {operation.__name__}") 5579 5580 by_name = ( 5581 self._match_text_seq("BY", "NAME") 5582 or self._match_text_seq("STRICT", "CORRESPONDING") 5583 or None 5584 ) 5585 if self._match_text_seq("CORRESPONDING"): 5586 by_name = True 5587 if not side and not kind: 5588 kind = "INNER" 5589 5590 on_column_list = None 5591 if by_name and self._match_texts(("ON", "BY")): 5592 on_column_list = self._parse_wrapped_csv(self._parse_column) 5593 5594 expression = self._parse_select( 5595 nested=True, parse_set_operation=False, consume_pipe=consume_pipe 5596 ) 5597 5598 return self.expression( 5599 operation( 5600 this=this, 5601 distinct=distinct, 5602 by_name=by_name, 5603 expression=expression, 5604 side=side, 5605 kind=kind, 5606 on=on_column_list, 5607 ), 5608 comments=comments, 5609 ) 5610 5611 def _parse_set_operations(self, this: exp.Expr | None) -> exp.Expr | None: 5612 while this: 5613 setop = self.parse_set_operation(this) 5614 if not setop: 5615 break 5616 this = setop 5617 5618 if isinstance(this, exp.SetOperation) and self.MODIFIERS_ATTACHED_TO_SET_OP: 5619 expression = this.expression 5620 5621 if expression: 5622 for arg in self.SET_OP_MODIFIERS: 5623 expr = expression.args.get(arg) 5624 if expr: 5625 this.set(arg, expr.pop()) 5626 5627 return this 5628 5629 def _parse_expression(self) -> exp.Expr | None: 5630 return self._parse_alias(self._parse_assignment()) 5631 5632 def _parse_assignment(self) -> exp.Expr | None: 5633 this = self._parse_disjunction() 5634 if not this and self._next.token_type in self.ASSIGNMENT: 5635 # This allows us to parse <non-identifier token> := <expr> 5636 this = exp.column( 5637 t.cast(str, self._advance_any(ignore_reserved=True) and self._prev.text) 5638 ) 5639 5640 while self._match_set(self.ASSIGNMENT): 5641 if isinstance(this, exp.Column) and len(this.parts) == 1: 5642 this = this.this 5643 5644 comments = self._prev_comments 5645 this = self.expression( 5646 self.ASSIGNMENT[self._prev.token_type]( 5647 this=this, expression=self._parse_assignment() 5648 ), 5649 comments=comments, 5650 ) 5651 5652 return this 5653 5654 def _parse_disjunction(self) -> exp.Expr | None: 5655 this = self._parse_conjunction() 5656 while self._match_set(self.DISJUNCTION): 5657 comments = self._prev_comments 5658 this = self.expression( 5659 self.DISJUNCTION[self._prev.token_type]( 5660 this=this, expression=self._parse_conjunction() 5661 ), 5662 comments=comments, 5663 ) 5664 return this 5665 5666 def _parse_conjunction(self) -> exp.Expr | None: 5667 this = self._parse_equality() 5668 while self._match_set(self.CONJUNCTION): 5669 comments = self._prev_comments 5670 this = self.expression( 5671 self.CONJUNCTION[self._prev.token_type]( 5672 this=this, expression=self._parse_equality() 5673 ), 5674 comments=comments, 5675 ) 5676 return this 5677 5678 def _parse_equality(self) -> exp.Expr | None: 5679 this = self._parse_comparison() 5680 while self._match_set(self.EQUALITY): 5681 comments = self._prev_comments 5682 this = self.expression( 5683 self.EQUALITY[self._prev.token_type]( 5684 this=this, expression=self._parse_comparison() 5685 ), 5686 comments=comments, 5687 ) 5688 return this 5689 5690 def _parse_comparison(self) -> exp.Expr | None: 5691 this = self._parse_range() 5692 while self._match_set(self.COMPARISON): 5693 comments = self._prev_comments 5694 this = self.expression( 5695 self.COMPARISON[self._prev.token_type](this=this, expression=self._parse_range()), 5696 comments=comments, 5697 ) 5698 return this 5699 5700 def _parse_range(self, this: exp.Expr | None = None) -> exp.Expr | None: 5701 this = this or self._parse_bitwise() 5702 negate = self._match(TokenType.NOT) 5703 5704 if self._match_set(self.RANGE_PARSERS): 5705 expression = self.RANGE_PARSERS[self._prev.token_type](self, this) 5706 if not expression: 5707 return this 5708 5709 this = expression 5710 elif self._match(TokenType.ISNULL) or (negate and self._match(TokenType.NULL)): 5711 this = self.expression(exp.Is(this=this, expression=exp.Null())) 5712 5713 # Postgres supports ISNULL and NOTNULL for conditions. 5714 # https://blog.andreiavram.ro/postgresql-null-composite-type/ 5715 if self._match(TokenType.NOTNULL): 5716 this = self.expression(exp.Is(this=this, expression=exp.Null())) 5717 this = self.expression(exp.Not(this=this)) 5718 5719 if negate: 5720 this = self._negate_range(this) 5721 5722 if self._match(TokenType.IS): 5723 this = self._parse_is(this) 5724 5725 return this 5726 5727 def _negate_range(self, this: exp.Expr | None = None) -> exp.Expr | None: 5728 if not this: 5729 return this 5730 5731 return self.expression(exp.Not(this=this)) 5732 5733 def _parse_is(self, this: exp.Expr | None) -> exp.Expr | None: 5734 index = self._index - 1 5735 negate = self._match(TokenType.NOT) 5736 5737 if self._match_text_seq("DISTINCT", "FROM"): 5738 klass = exp.NullSafeEQ if negate else exp.NullSafeNEQ 5739 return self.expression(klass(this=this, expression=self._parse_bitwise())) 5740 5741 if self._match(TokenType.JSON): 5742 kind = self._match_texts(self.IS_JSON_PREDICATE_KIND) and self._prev.text.upper() 5743 5744 if self._match_text_seq("WITH"): 5745 _with = True 5746 elif self._match_text_seq("WITHOUT"): 5747 _with = False 5748 else: 5749 _with = None 5750 5751 unique = self._match(TokenType.UNIQUE) 5752 self._match_text_seq("KEYS") 5753 expression: exp.Expr | None = self.expression( 5754 exp.JSON(this=kind, with_=_with, unique=unique) 5755 ) 5756 else: 5757 expression = self._parse_null() or self._parse_bitwise() 5758 if not expression: 5759 self._retreat(index) 5760 return None 5761 5762 this = self.expression(exp.Is(this=this, expression=expression)) 5763 this = self.expression(exp.Not(this=this)) if negate else this 5764 return self._parse_column_ops(this) 5765 5766 def _parse_in(self, this: exp.Expr | None, alias: bool = False) -> exp.In: 5767 unnest = self._parse_unnest(with_alias=False) 5768 if unnest: 5769 this = self.expression(exp.In(this=this, unnest=unnest)) 5770 elif self._match_set((TokenType.L_PAREN, TokenType.L_BRACKET)): 5771 matched_l_paren = self._prev.token_type == TokenType.L_PAREN 5772 expressions = self._parse_csv(lambda: self._parse_select_or_expression(alias=alias)) 5773 5774 if len(expressions) == 1 and isinstance(query := expressions[0], exp.Query): 5775 this = self.expression( 5776 exp.In(this=this, query=self._parse_query_modifiers(query).subquery(copy=False)) 5777 ) 5778 else: 5779 this = self.expression(exp.In(this=this, expressions=expressions)) 5780 5781 if matched_l_paren: 5782 self._match_r_paren(this) 5783 elif not self._match(TokenType.R_BRACKET, expression=this): 5784 self.raise_error("Expecting ]") 5785 else: 5786 this = self.expression(exp.In(this=this, field=self._parse_column())) 5787 5788 return this 5789 5790 def _parse_between(self, this: exp.Expr | None) -> exp.Between: 5791 symmetric = None 5792 if self._match_text_seq("SYMMETRIC"): 5793 symmetric = True 5794 elif self._match_text_seq("ASYMMETRIC"): 5795 symmetric = False 5796 5797 low = self._parse_bitwise() 5798 self._match(TokenType.AND) 5799 high = self._parse_bitwise() 5800 5801 return self.expression(exp.Between(this=this, low=low, high=high, symmetric=symmetric)) 5802 5803 def _parse_escape(self, this: exp.Expr | None) -> exp.Expr | None: 5804 if not self._match(TokenType.ESCAPE): 5805 return this 5806 return self.expression( 5807 exp.Escape(this=this, expression=self._parse_string() or self._parse_null()) 5808 ) 5809 5810 def _parse_interval_span(self, this: exp.Expr) -> exp.Interval: 5811 # handle day-time format interval span with omitted units: 5812 # INTERVAL '<number days> hh[:][mm[:ss[.ff]]]' <maybe `unit TO unit`> 5813 interval_span_units_omitted = None 5814 if ( 5815 this 5816 and this.is_string 5817 and self.SUPPORTS_OMITTED_INTERVAL_SPAN_UNIT 5818 and exp.INTERVAL_DAY_TIME_RE.match(this.name) 5819 ): 5820 index = self._index 5821 5822 # Var "TO" Var 5823 first_unit = self._parse_var(any_token=True, upper=True) 5824 second_unit = None 5825 if first_unit and self._match_text_seq("TO"): 5826 second_unit = self._parse_var(any_token=True, upper=True) 5827 5828 interval_span_units_omitted = not (first_unit and second_unit) 5829 5830 self._retreat(index) 5831 5832 unit = ( 5833 None 5834 if interval_span_units_omitted 5835 else ( 5836 self._parse_function() 5837 or ( 5838 not self._match_set((TokenType.ALIAS, TokenType.DCOLON), advance=False) 5839 and self._parse_var(any_token=True, upper=True) 5840 ) 5841 ) 5842 ) 5843 5844 # Most dialects support, e.g., the form INTERVAL '5' day, thus we try to parse 5845 # each INTERVAL expression into this canonical form so it's easy to transpile 5846 if this and this.is_number: 5847 this = exp.Literal.string(this.to_py()) 5848 elif this and this.is_string: 5849 parts = exp.INTERVAL_STRING_RE.findall(this.name) 5850 if parts and unit: 5851 # Unconsume the eagerly-parsed unit, since the real unit was part of the string 5852 unit = None 5853 self._retreat(self._index - 1) 5854 5855 if len(parts) == 1: 5856 this = exp.Literal.string(parts[0][0]) 5857 unit = self.expression(exp.Var(this=parts[0][1].upper())) 5858 5859 if self.INTERVAL_SPANS and self._match_text_seq("TO"): 5860 unit = self.expression( 5861 exp.IntervalSpan( 5862 this=unit, 5863 expression=self._parse_function() 5864 or self._parse_var(any_token=True, upper=True), 5865 ) 5866 ) 5867 5868 return self.expression(exp.Interval(this=this, unit=unit)) 5869 5870 def _parse_interval(self, require_interval: bool = True) -> exp.Add | exp.Interval | None: 5871 index = self._index 5872 5873 if not self._match(TokenType.INTERVAL) and require_interval: 5874 return None 5875 5876 if self._match(TokenType.STRING, advance=False): 5877 this = self._parse_primary() 5878 else: 5879 this = self._parse_term() 5880 5881 if not this or ( 5882 isinstance(this, exp.Column) 5883 and not this.table 5884 and not this.this.quoted 5885 and self._curr 5886 and self._curr.text.upper() not in self.dialect.VALID_INTERVAL_UNITS 5887 ): 5888 self._retreat(index) 5889 return None 5890 5891 interval = self._parse_interval_span(this) 5892 5893 index = self._index 5894 self._match(TokenType.PLUS) 5895 5896 # Convert INTERVAL 'val_1' unit_1 [+] ... [+] 'val_n' unit_n into a sum of intervals 5897 if self._match_set((TokenType.STRING, TokenType.NUMBER), advance=False): 5898 return self.expression(exp.Add(this=interval, expression=self._parse_interval(False))) 5899 5900 self._retreat(index) 5901 return interval 5902 5903 def _parse_bitwise(self) -> exp.Expr | None: 5904 this = self._parse_term() 5905 5906 while True: 5907 if self._match_set(self.BITWISE): 5908 this = self.expression( 5909 self.BITWISE[self._prev.token_type](this=this, expression=self._parse_term()) 5910 ) 5911 elif self.dialect.DPIPE_IS_STRING_CONCAT and self._match(TokenType.DPIPE): 5912 this = self.expression( 5913 exp.DPipe( 5914 this=this, 5915 expression=self._parse_term(), 5916 safe=not self.dialect.STRICT_STRING_CONCAT, 5917 ) 5918 ) 5919 elif self._match(TokenType.DQMARK): 5920 this = self.expression( 5921 exp.Coalesce(this=this, expressions=ensure_list(self._parse_term())) 5922 ) 5923 elif self._match_pair(TokenType.LT, TokenType.LT): 5924 this = self.expression( 5925 exp.BitwiseLeftShift(this=this, expression=self._parse_term()) 5926 ) 5927 elif self._match_pair(TokenType.GT, TokenType.GT): 5928 this = self.expression( 5929 exp.BitwiseRightShift(this=this, expression=self._parse_term()) 5930 ) 5931 else: 5932 break 5933 5934 return this 5935 5936 def _parse_term(self) -> exp.Expr | None: 5937 this = self._parse_factor() 5938 5939 while self._match_set(self.TERM): 5940 klass = self.TERM[self._prev.token_type] 5941 comments = self._prev_comments 5942 expression = self._parse_factor() 5943 5944 this = self.expression(klass(this=this, expression=expression), comments=comments) 5945 5946 if isinstance(this, exp.Collate): 5947 expr = this.expression 5948 5949 # Preserve collations such as pg_catalog."default" (Postgres) as columns, otherwise 5950 # fallback to Identifier / Var 5951 if isinstance(expr, exp.Column) and len(expr.parts) == 1: 5952 ident = expr.this 5953 if isinstance(ident, exp.Identifier): 5954 this.set("expression", ident if ident.quoted else exp.var(ident.name)) 5955 5956 return this 5957 5958 def _parse_factor(self) -> exp.Expr | None: 5959 parse_method = self._parse_exponent if self.EXPONENT else self._parse_unary 5960 this = self._parse_at_time_zone(parse_method()) 5961 5962 while self._match_set(self.FACTOR): 5963 klass = self.FACTOR[self._prev.token_type] 5964 comments = self._prev_comments 5965 expression = parse_method() 5966 5967 if not expression and klass is exp.IntDiv and self._prev.text.isalpha(): 5968 self._retreat(self._index - 1) 5969 return this 5970 5971 this = self.expression(klass(this=this, expression=expression), comments=comments) 5972 5973 if isinstance(this, exp.Div): 5974 this.set("typed", self.dialect.TYPED_DIVISION) 5975 this.set("safe", self.dialect.SAFE_DIVISION) 5976 5977 return this 5978 5979 def _parse_exponent(self) -> exp.Expr | None: 5980 this = self._parse_unary() 5981 while self._match_set(self.EXPONENT): 5982 comments = self._prev_comments 5983 this = self.expression( 5984 self.EXPONENT[self._prev.token_type](this=this, expression=self._parse_unary()), 5985 comments=comments, 5986 ) 5987 return this 5988 5989 def _parse_unary(self) -> exp.Expr | None: 5990 if self._match_set(self.UNARY_PARSERS): 5991 return self.UNARY_PARSERS[self._prev.token_type](self) 5992 return self._parse_type() 5993 5994 def _parse_type( 5995 self, parse_interval: bool = True, fallback_to_identifier: bool = False 5996 ) -> exp.Expr | None: 5997 if not fallback_to_identifier and (atom := self._parse_atom()) is not None: 5998 return atom 5999 6000 if interval := parse_interval and self._parse_interval(): 6001 return self._parse_column_ops(interval) 6002 6003 index = self._index 6004 data_type = self._parse_types(check_func=True, allow_identifiers=False) 6005 6006 # parse_types() returns a Cast if we parsed BQ's inline constructor <type>(<values>) e.g. 6007 # STRUCT<a INT, b STRING>(1, 'foo'), which is canonicalized to CAST(<values> AS <type>) 6008 if isinstance(data_type, exp.Cast): 6009 # This constructor can contain ops directly after it, for instance struct unnesting: 6010 # STRUCT<a INT, b STRING>(1, 'foo').* --> CAST(STRUCT(1, 'foo') AS STRUCT<a iNT, b STRING).* 6011 return self._parse_column_ops(data_type) 6012 6013 if data_type: 6014 index2 = self._index 6015 this = self._parse_primary() 6016 6017 if isinstance(this, exp.Literal): 6018 literal = this.name 6019 this = self._parse_column_ops(this) 6020 6021 parser = self.TYPE_LITERAL_PARSERS.get(data_type.this) 6022 if parser: 6023 return parser(self, this, data_type) 6024 6025 if ( 6026 self.ZONE_AWARE_TIMESTAMP_CONSTRUCTOR 6027 and data_type.is_type(exp.DType.TIMESTAMP) 6028 and TIME_ZONE_RE.search(literal) 6029 ): 6030 data_type = exp.DType.TIMESTAMPTZ.into_expr() 6031 6032 return self.expression(exp.Cast(this=this, to=data_type)) 6033 6034 # The expressions arg gets set by the parser when we have something like DECIMAL(38, 0) 6035 # in the input SQL. In that case, we'll produce these tokens: DECIMAL ( 38 , 0 ) 6036 # 6037 # If the index difference here is greater than 1, that means the parser itself must have 6038 # consumed additional tokens such as the DECIMAL scale and precision in the above example. 6039 # 6040 # If it's not greater than 1, then it must be 1, because we've consumed at least the type 6041 # keyword, meaning that the expressions arg of the DataType must have gotten set by a 6042 # callable in the TYPE_CONVERTERS mapping. For example, Snowflake converts DECIMAL to 6043 # DECIMAL(38, 0)) in order to facilitate the data type's transpilation. 6044 # 6045 # In these cases, we don't really want to return the converted type, but instead retreat 6046 # and try to parse a Column or Identifier in the section below. 6047 if data_type.expressions and index2 - index > 1: 6048 self._retreat(index2) 6049 return self._parse_column_ops(data_type) 6050 6051 self._retreat(index) 6052 6053 if fallback_to_identifier: 6054 return self._parse_id_var() 6055 6056 return self._parse_column() 6057 6058 def _parse_type_size(self) -> exp.DataTypeParam | None: 6059 this = self._parse_type() 6060 if not this: 6061 return None 6062 6063 if isinstance(this, exp.Column) and not this.table: 6064 this = exp.var(this.name.upper()) 6065 6066 return self.expression( 6067 exp.DataTypeParam(this=this, expression=self._parse_var(any_token=True)) 6068 ) 6069 6070 def _parse_user_defined_type(self, identifier: exp.Identifier) -> exp.Expr | None: 6071 type_name = identifier.name 6072 6073 while self._match(TokenType.DOT): 6074 type_name = f"{type_name}.{self._advance_any() and self._prev.text}" 6075 6076 return exp.DataType.build(type_name, dialect=self.dialect, udt=True) 6077 6078 def _parse_types( 6079 self, check_func: bool = False, schema: bool = False, allow_identifiers: bool = True 6080 ) -> exp.Expr | None: 6081 index = self._index 6082 this: exp.Expr | None = None 6083 6084 if self._match_set(self.TYPE_TOKENS): 6085 type_token = self._prev.token_type 6086 else: 6087 type_token = None 6088 identifier = allow_identifiers and self._parse_id_var( 6089 any_token=False, tokens=(TokenType.VAR,) 6090 ) 6091 if isinstance(identifier, exp.Identifier): 6092 try: 6093 tokens = self.dialect.tokenize(identifier.name) 6094 except TokenError: 6095 tokens = None 6096 6097 if tokens and (type_token := tokens[0].token_type) in self.TYPE_TOKENS: 6098 if len(tokens) > 1: 6099 return exp.DataType.build(identifier.name, dialect=self.dialect) 6100 elif self.dialect.SUPPORTS_USER_DEFINED_TYPES: 6101 this = self._parse_user_defined_type(identifier) 6102 else: 6103 self._retreat(self._index - 1) 6104 return None 6105 else: 6106 return None 6107 6108 if type_token == TokenType.PSEUDO_TYPE: 6109 return self.expression(exp.PseudoType(this=self._prev.text.upper())) 6110 6111 if type_token == TokenType.OBJECT_IDENTIFIER: 6112 return self.expression(exp.ObjectIdentifier(this=self._prev.text.upper())) 6113 6114 # https://materialize.com/docs/sql/types/map/ 6115 if type_token == TokenType.MAP and self._match(TokenType.L_BRACKET): 6116 key_type = self._parse_types( 6117 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 6118 ) 6119 if not self._match(TokenType.FARROW): 6120 self._retreat(index) 6121 return None 6122 6123 value_type = self._parse_types( 6124 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 6125 ) 6126 if not self._match(TokenType.R_BRACKET): 6127 self._retreat(index) 6128 return None 6129 6130 return exp.DataType( 6131 this=exp.DType.MAP, 6132 expressions=[key_type, value_type], 6133 nested=True, 6134 ) 6135 6136 nested = type_token in self.NESTED_TYPE_TOKENS 6137 is_struct = type_token in self.STRUCT_TYPE_TOKENS 6138 is_aggregate = type_token in self.AGGREGATE_TYPE_TOKENS 6139 expressions = None 6140 maybe_func = False 6141 6142 if self._match(TokenType.L_PAREN): 6143 if is_struct: 6144 expressions = self._parse_csv(lambda: self._parse_struct_types(type_required=True)) 6145 elif nested: 6146 expressions = self._parse_csv( 6147 lambda: self._parse_types( 6148 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 6149 ) 6150 ) 6151 if type_token == TokenType.NULLABLE and len(expressions) == 1: 6152 this = expressions[0] 6153 this.set("nullable", True) 6154 self._match_r_paren() 6155 return this 6156 elif type_token in self.ENUM_TYPE_TOKENS: 6157 expressions = self._parse_csv(self._parse_equality) 6158 elif type_token == TokenType.JSON: 6159 # ClickHouse JSON type supports arguments: JSON(col Type, SKIP col, param=value) 6160 # https://clickhouse.com/docs/sql-reference/data-types/newjson 6161 expressions = self._parse_csv(self._parse_json_type_arg) 6162 elif is_aggregate: 6163 func_or_ident = self._parse_function(anonymous=True) or self._parse_id_var( 6164 any_token=False, tokens=(TokenType.VAR, TokenType.ANY) 6165 ) 6166 if not func_or_ident: 6167 return None 6168 expressions = [func_or_ident] 6169 if self._match(TokenType.COMMA): 6170 expressions.extend( 6171 self._parse_csv( 6172 lambda: self._parse_types( 6173 check_func=check_func, 6174 schema=schema, 6175 allow_identifiers=allow_identifiers, 6176 ) 6177 ) 6178 ) 6179 else: 6180 expressions = self._parse_csv(self._parse_type_size) 6181 6182 # https://docs.snowflake.com/en/sql-reference/data-types-vector 6183 if type_token == TokenType.VECTOR and len(expressions) == 2: 6184 expressions = self._parse_vector_expressions(expressions) 6185 6186 if not self._match(TokenType.R_PAREN): 6187 self._retreat(index) 6188 return None 6189 6190 maybe_func = True 6191 6192 values: list[exp.Expr] | None = None 6193 6194 if nested and self._match(TokenType.LT): 6195 if is_struct: 6196 expressions = self._parse_csv(lambda: self._parse_struct_types(type_required=True)) 6197 else: 6198 expressions = self._parse_csv( 6199 lambda: self._parse_types( 6200 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 6201 ) 6202 ) 6203 6204 if not self._match(TokenType.GT): 6205 self.raise_error("Expecting >") 6206 6207 if self._match_set((TokenType.L_BRACKET, TokenType.L_PAREN)): 6208 values = self._parse_csv(self._parse_disjunction) 6209 if not values and is_struct: 6210 values = None 6211 self._retreat(self._index - 1) 6212 else: 6213 self._match_set((TokenType.R_BRACKET, TokenType.R_PAREN)) 6214 6215 if type_token in self.TIMESTAMPS: 6216 if self._match_text_seq("WITH", "TIME", "ZONE"): 6217 maybe_func = False 6218 tz_type = exp.DType.TIMETZ if type_token in self.TIMES else exp.DType.TIMESTAMPTZ 6219 this = exp.DataType(this=tz_type, expressions=expressions) 6220 elif self._match_text_seq("WITH", "LOCAL", "TIME", "ZONE"): 6221 maybe_func = False 6222 this = exp.DataType(this=exp.DType.TIMESTAMPLTZ, expressions=expressions) 6223 elif self._match_text_seq("WITHOUT", "TIME", "ZONE"): 6224 maybe_func = False 6225 elif type_token == TokenType.INTERVAL: 6226 if self._curr.text.upper() in self.dialect.VALID_INTERVAL_UNITS: 6227 unit = self._parse_var(upper=True) 6228 if self._match_text_seq("TO"): 6229 unit = exp.IntervalSpan(this=unit, expression=self._parse_var(upper=True)) 6230 6231 this = self.expression(exp.DataType(this=self.expression(exp.Interval(unit=unit)))) 6232 else: 6233 this = self.expression(exp.DataType(this=exp.DType.INTERVAL)) 6234 elif type_token == TokenType.VOID: 6235 this = exp.DataType(this=exp.DType.NULL) 6236 6237 if maybe_func and check_func: 6238 index2 = self._index 6239 peek = self._parse_string() 6240 6241 if not peek: 6242 self._retreat(index) 6243 return None 6244 6245 self._retreat(index2) 6246 6247 if not this: 6248 assert type_token is not None 6249 if self._match_text_seq("UNSIGNED"): 6250 unsigned_type_token = self.SIGNED_TO_UNSIGNED_TYPE_TOKEN.get(type_token) 6251 if not unsigned_type_token: 6252 self.raise_error(f"Cannot convert {type_token.name} to unsigned.") 6253 6254 type_token = unsigned_type_token or type_token 6255 6256 # NULLABLE without parentheses can be a column (Presto/Trino) 6257 if type_token == TokenType.NULLABLE and not expressions: 6258 self._retreat(index) 6259 return None 6260 6261 this = exp.DataType( 6262 this=exp.DType[type_token.name], 6263 expressions=expressions, 6264 nested=nested, 6265 ) 6266 6267 # Empty arrays/structs are allowed 6268 if values is not None: 6269 cls = exp.Struct if is_struct else exp.Array 6270 this = exp.cast(cls(expressions=values), this, copy=False) 6271 6272 elif expressions: 6273 this.set("expressions", expressions) 6274 6275 # https://materialize.com/docs/sql/types/list/#type-name 6276 while self._match(TokenType.LIST): 6277 this = exp.DataType(this=exp.DType.LIST, expressions=[this], nested=True) 6278 6279 index = self._index 6280 6281 # Postgres supports the INT ARRAY[3] syntax as a synonym for INT[3] 6282 matched_array = self._match(TokenType.ARRAY) 6283 6284 while self._curr: 6285 datatype_token = self._prev.token_type 6286 matched_l_bracket = self._match(TokenType.L_BRACKET) 6287 6288 if (not matched_l_bracket and not matched_array) or ( 6289 datatype_token == TokenType.ARRAY and self._match(TokenType.R_BRACKET) 6290 ): 6291 # Postgres allows casting empty arrays such as ARRAY[]::INT[], 6292 # not to be confused with the fixed size array parsing 6293 break 6294 6295 matched_array = False 6296 values = self._parse_csv(self._parse_disjunction) or None 6297 if ( 6298 values 6299 and not schema 6300 and ( 6301 not self.dialect.SUPPORTS_FIXED_SIZE_ARRAYS 6302 or datatype_token == TokenType.ARRAY 6303 or not self._match(TokenType.R_BRACKET, advance=False) 6304 ) 6305 ): 6306 # Retreating here means that we should not parse the following values as part of the data type, e.g. in DuckDB 6307 # ARRAY[1] should retreat and instead be parsed into exp.Array in contrast to INT[x][y] which denotes a fixed-size array data type 6308 self._retreat(index) 6309 break 6310 6311 this = exp.DataType( 6312 this=exp.DType.ARRAY, expressions=[this], values=values, nested=True 6313 ) 6314 self._match(TokenType.R_BRACKET) 6315 6316 if self.TYPE_CONVERTERS and isinstance(this.this, exp.DType): 6317 converter = self.TYPE_CONVERTERS.get(this.this) 6318 if converter: 6319 this = converter(t.cast(exp.DataType, this)) 6320 6321 return this 6322 6323 def _parse_json_type_arg(self) -> exp.Expr | None: 6324 """Parse a single argument to ClickHouse's JSON type.""" 6325 6326 # SKIP col or SKIP REGEXP 'pattern' 6327 if self._match_text_seq("SKIP"): 6328 regexp = self._match(TokenType.RLIKE) 6329 arg = self._parse_column() 6330 if isinstance(arg, exp.Column): 6331 arg = arg.to_dot() 6332 return self.expression(exp.SkipJSONColumn(regexp=regexp, expression=arg)) 6333 6334 param_or_col = self._parse_column() 6335 if not isinstance(param_or_col, exp.Column): 6336 return None 6337 6338 # Parameter: name=value (e.g., max_dynamic_paths=2) 6339 if len(param_or_col.parts) == 1 and self._match(TokenType.EQ): 6340 param = param_or_col.name 6341 value = self._parse_primary() 6342 return self.expression(exp.EQ(this=exp.var(param), expression=value)) 6343 6344 # Column type hint: col_name Type 6345 col = param_or_col.to_dot() 6346 kind = self._parse_types(check_func=False, allow_identifiers=False) 6347 return self.expression(exp.ColumnDef(this=col, kind=kind)) 6348 6349 def _parse_vector_expressions(self, expressions: list[exp.Expr]) -> list[exp.Expr]: 6350 return [exp.DataType.build(expressions[0].name, dialect=self.dialect), *expressions[1:]] 6351 6352 def _parse_struct_types(self, type_required: bool = False) -> exp.Expr | None: 6353 index = self._index 6354 6355 if ( 6356 self._curr 6357 and self._next 6358 and self._curr.token_type in self.TYPE_TOKENS 6359 and self._next.token_type in self.TYPE_TOKENS 6360 ): 6361 # Takes care of special cases like `STRUCT<list ARRAY<...>>` where the identifier is also a 6362 # type token. Without this, the list will be parsed as a type and we'll eventually crash 6363 this = self._parse_id_var() 6364 else: 6365 this = ( 6366 self._parse_type(parse_interval=False, fallback_to_identifier=True) 6367 or self._parse_id_var() 6368 ) 6369 6370 self._match(TokenType.COLON) 6371 6372 if ( 6373 type_required 6374 and not isinstance(this, exp.DataType) 6375 and not self._match_set(self.TYPE_TOKENS, advance=False) 6376 ): 6377 self._retreat(index) 6378 return self._parse_types() 6379 6380 return self._parse_column_def(this) 6381 6382 def _parse_at_time_zone(self, this: exp.Expr | None) -> exp.Expr | None: 6383 if not self._match_text_seq("AT", "TIME", "ZONE"): 6384 return this 6385 return self._parse_at_time_zone( 6386 self.expression(exp.AtTimeZone(this=this, zone=self._parse_unary())) 6387 ) 6388 6389 def _parse_atom(self) -> exp.Expr | None: 6390 if ( 6391 self._curr.token_type in self.IDENTIFIER_TOKENS 6392 and (column := self._parse_column()) is not None 6393 ): 6394 return column 6395 6396 token = self._curr 6397 token_type = token.token_type 6398 6399 if not (primary_parser := self.PRIMARY_PARSERS.get(token_type)): 6400 return None 6401 6402 next_type = self._next.token_type 6403 6404 if ( 6405 next_type in self.COLUMN_OPERATORS 6406 or next_type in self.COLUMN_POSTFIX_TOKENS 6407 or (token_type == TokenType.STRING and next_type == TokenType.STRING) 6408 ): 6409 return None 6410 6411 self._advance() 6412 return primary_parser(self, token) 6413 6414 def _parse_column(self) -> exp.Expr | None: 6415 column: exp.Expr | None = self._parse_column_parts_fast() 6416 if column is None: 6417 this = self._parse_column_reference() 6418 if not this: 6419 this = self._parse_bracket(this) 6420 column = self._parse_column_ops(this) if this else this 6421 6422 if column: 6423 if self.dialect.SUPPORTS_COLUMN_JOIN_MARKS: 6424 column.set("join_mark", self._match(TokenType.JOIN_MARKER)) 6425 if self.COLON_IS_VARIANT_EXTRACT: 6426 column = self._parse_colon_as_variant_extract(column) 6427 6428 return column 6429 6430 def _parse_column_parts_fast(self) -> exp.Column | exp.Dot | None: 6431 """Fast path for simple column and dot references (a, a.b, ...). 6432 6433 Greedily consumes VAR/IDENTIFIER tokens separated by DOTs, then checks 6434 that nothing complex follows. If it does, retreats and returns None so 6435 the slow path can handle it. For >4 parts, wraps in exp.Dot nodes. 6436 """ 6437 index = self._index 6438 parts: list[exp.Identifier] | None = None 6439 all_comments: list[str] | None = None 6440 6441 while self._match_set(self.IDENTIFIER_TOKENS): 6442 token = self._prev 6443 comments = self._prev_comments 6444 6445 if parts is None and token.text.upper() in self.NO_PAREN_FUNCTION_PARSERS: 6446 self._retreat(index) 6447 return None 6448 6449 has_dot = self._match(TokenType.DOT) 6450 curr_tt = self._curr.token_type 6451 6452 if not has_dot: 6453 if curr_tt in self.COLUMN_OPERATORS or curr_tt in self.COLUMN_POSTFIX_TOKENS: 6454 self._retreat(index) 6455 return None 6456 elif curr_tt not in self.IDENTIFIER_TOKENS: 6457 self._retreat(index) 6458 return None 6459 6460 if parts is None: 6461 parts = [] 6462 6463 if comments: 6464 if all_comments is None: 6465 all_comments = [] 6466 all_comments.extend(comments) 6467 self._prev_comments = [] 6468 6469 parts.append( 6470 self.expression( 6471 exp.Identifier( 6472 this=token.text, quoted=token.token_type == TokenType.IDENTIFIER 6473 ), 6474 token, 6475 ) 6476 ) 6477 6478 if not has_dot: 6479 break 6480 6481 if parts is None: 6482 return None 6483 6484 n = len(parts) 6485 6486 if n == 1: 6487 column: exp.Column | exp.Dot = exp.Column(this=parts[0]) 6488 elif n == 2: 6489 column = exp.Column(this=parts[1], table=parts[0]) 6490 elif n == 3: 6491 column = exp.Column(this=parts[2], table=parts[1], db=parts[0]) 6492 else: 6493 column = exp.Column(this=parts[3], table=parts[2], db=parts[1], catalog=parts[0]) 6494 6495 for i in range(4, n): 6496 column = exp.Dot(this=column, expression=parts[i]) 6497 6498 if all_comments: 6499 column.add_comments(all_comments) 6500 6501 return column 6502 6503 def _parse_column_reference(self) -> exp.Expr | None: 6504 this = self._parse_field() 6505 if ( 6506 not this 6507 and self._match(TokenType.VALUES, advance=False) 6508 and self.VALUES_FOLLOWED_BY_PAREN 6509 and (not self._next or self._next.token_type != TokenType.L_PAREN) 6510 ): 6511 this = self._parse_id_var() 6512 6513 if isinstance(this, exp.Identifier): 6514 # We bubble up comments from the Identifier to the Column 6515 this = self.expression(exp.Column(this=this), comments=this.pop_comments()) 6516 6517 return this 6518 6519 def _build_json_extract( 6520 self, 6521 this: exp.Expr | None, 6522 path_parts: list[exp.JSONPathPart], 6523 escape: bool | None, 6524 ) -> tuple[exp.Expr | None, list[exp.JSONPathPart]]: 6525 if len(path_parts) > 1: 6526 this = self.expression( 6527 exp.JSONExtract( 6528 this=this, 6529 expression=exp.JSONPath(expressions=path_parts, escape=escape), 6530 variant_extract=True, 6531 requires_json=self.JSON_EXTRACT_REQUIRES_JSON_EXPRESSION, 6532 ) 6533 ) 6534 path_parts = [exp.JSONPathRoot()] 6535 6536 return this, path_parts 6537 6538 def _parse_colon_as_variant_extract(self, this: exp.Expr | None) -> exp.Expr | None: 6539 path_parts: list[exp.JSONPathPart] = [exp.JSONPathRoot()] 6540 escape = None 6541 6542 while self._match(TokenType.COLON): 6543 key = self._parse_id_var(any_token=True, tokens=(TokenType.SELECT,)) 6544 6545 if key: 6546 if isinstance(key, exp.Identifier) and key.quoted: 6547 escape = True 6548 path_parts.append(exp.JSONPathKey(this=key.name)) 6549 6550 while True: 6551 if self._match(TokenType.DOT): 6552 next_key = self._parse_id_var(any_token=True, tokens=(TokenType.SELECT,)) 6553 6554 if next_key: 6555 if isinstance(next_key, exp.Identifier) and next_key.quoted: 6556 escape = True 6557 path_parts.append(exp.JSONPathKey(this=next_key.name)) 6558 elif self._match(TokenType.L_BRACKET): 6559 bracket_expr = self._parse_bracket_key_value() 6560 6561 if not self._match(TokenType.R_BRACKET): 6562 self.raise_error("Expected ]") 6563 6564 if bracket_expr: 6565 if bracket_expr.is_string: 6566 path_parts.append(exp.JSONPathKey(this=bracket_expr.name)) 6567 escape = True 6568 elif bracket_expr.is_star: 6569 path_parts.append(exp.JSONPathSubscript(this=exp.JSONPathWildcard())) 6570 elif bracket_expr.is_number: 6571 path_parts.append(exp.JSONPathSubscript(this=bracket_expr.to_py())) 6572 else: 6573 this, path_parts = self._build_json_extract(this, path_parts, escape) 6574 escape = None 6575 6576 this = self.expression( 6577 exp.Bracket( 6578 this=this, expressions=[bracket_expr], json_access=True 6579 ), 6580 ) 6581 6582 elif self._match(TokenType.DCOLON): 6583 this, path_parts = self._build_json_extract(this, path_parts, escape) 6584 escape = None 6585 6586 cast_type = self._parse_types() 6587 if cast_type: 6588 this = self.expression(exp.Cast(this=this, to=cast_type)) 6589 else: 6590 self.raise_error("Expected type after '::'") 6591 else: 6592 break 6593 6594 this, _ = self._build_json_extract(this, path_parts, escape) 6595 6596 return this 6597 6598 def _parse_dcolon(self) -> exp.Expr | None: 6599 return self._parse_types() 6600 6601 def _parse_column_ops(self, this: exp.Expr | None) -> exp.Expr | None: 6602 while self._curr.token_type in self.BRACKETS: 6603 this = self._parse_bracket(this) 6604 6605 column_operators = self.COLUMN_OPERATORS 6606 cast_column_operators = self.CAST_COLUMN_OPERATORS 6607 while self._curr: 6608 op_token = self._curr.token_type 6609 6610 if op_token not in column_operators: 6611 break 6612 op = column_operators[op_token] 6613 self._advance() 6614 6615 if op_token in cast_column_operators: 6616 field = self._parse_dcolon() 6617 if not field: 6618 self.raise_error("Expected type") 6619 elif op and self._curr: 6620 field = self._parse_column_reference() or self._parse_bitwise() 6621 if isinstance(field, exp.Column) and self._match(TokenType.DOT, advance=False): 6622 field = self._parse_column_ops(field) 6623 else: 6624 field = self._parse_field(any_token=True, anonymous_func=True) 6625 6626 # Function calls can be qualified, e.g., x.y.FOO() 6627 # This converts the final AST to a series of Dots leading to the function call 6628 # https://cloud.google.com/bigquery/docs/reference/standard-sql/functions-reference#function_call_rules 6629 if isinstance(field, (exp.Func, exp.Window)) and this: 6630 this = this.transform( 6631 lambda n: n.to_dot(include_dots=False) if isinstance(n, exp.Column) else n 6632 ) 6633 6634 if op: 6635 this = op(self, this, field) 6636 elif isinstance(this, exp.Column) and not this.args.get("catalog"): 6637 this = self.expression( 6638 exp.Column( 6639 this=field, 6640 table=this.this, 6641 db=this.args.get("table"), 6642 catalog=this.args.get("db"), 6643 ), 6644 comments=this.comments, 6645 ) 6646 elif isinstance(field, exp.Window): 6647 # Move the exp.Dot's to the window's function 6648 window_func = self.expression(exp.Dot(this=this, expression=field.this)) 6649 field.set("this", window_func) 6650 this = field 6651 else: 6652 this = self.expression(exp.Dot(this=this, expression=field)) 6653 6654 if field and field.comments: 6655 t.cast(exp.Expr, this).add_comments(field.pop_comments()) 6656 6657 this = self._parse_bracket(this) 6658 6659 return this 6660 6661 def _parse_paren(self) -> exp.Expr | None: 6662 if not self._match(TokenType.L_PAREN): 6663 return None 6664 6665 comments = self._prev_comments 6666 query = self._parse_select() 6667 6668 if query: 6669 expressions = [query] 6670 else: 6671 expressions = self._parse_expressions() 6672 6673 this = seq_get(expressions, 0) 6674 6675 if not this and self._match(TokenType.R_PAREN, advance=False): 6676 this = self.expression(exp.Tuple()) 6677 elif isinstance(this, exp.UNWRAPPED_QUERIES): 6678 this = self._parse_subquery(this=this, parse_alias=False) 6679 elif isinstance(this, (exp.Subquery, exp.Values)): 6680 this = self._parse_subquery( 6681 this=self._parse_query_modifiers(self._parse_set_operations(this)), 6682 parse_alias=False, 6683 ) 6684 elif len(expressions) > 1 or self._prev.token_type == TokenType.COMMA: 6685 this = self.expression(exp.Tuple(expressions=expressions)) 6686 else: 6687 this = self.expression(exp.Paren(this=this)) 6688 6689 if this: 6690 this.add_comments(comments) 6691 6692 self._match_r_paren(expression=this) 6693 6694 if isinstance(this, exp.Paren) and isinstance(this.this, exp.AggFunc): 6695 return self._parse_window(this) 6696 6697 return this 6698 6699 def _parse_primary(self) -> exp.Expr | None: 6700 if self._match_set(self.PRIMARY_PARSERS): 6701 token_type = self._prev.token_type 6702 primary = self.PRIMARY_PARSERS[token_type](self, self._prev) 6703 6704 if token_type == TokenType.STRING: 6705 expressions = [primary] 6706 while self._match(TokenType.STRING): 6707 expressions.append(exp.Literal.string(self._prev.text)) 6708 6709 if len(expressions) > 1: 6710 return self.expression( 6711 exp.Concat(expressions=expressions, coalesce=self.dialect.CONCAT_COALESCE) 6712 ) 6713 6714 return primary 6715 6716 if self._match_pair(TokenType.DOT, TokenType.NUMBER): 6717 return exp.Literal.number(f"0.{self._prev.text}") 6718 6719 return self._parse_paren() 6720 6721 def _parse_field( 6722 self, 6723 any_token: bool = False, 6724 tokens: t.Collection[TokenType] | None = None, 6725 anonymous_func: bool = False, 6726 ) -> exp.Expr | None: 6727 if anonymous_func: 6728 field = ( 6729 self._parse_function(anonymous=anonymous_func, any_token=any_token) 6730 or self._parse_primary() 6731 ) 6732 else: 6733 field = self._parse_primary() or self._parse_function( 6734 anonymous=anonymous_func, any_token=any_token 6735 ) 6736 return field or self._parse_id_var(any_token=any_token, tokens=tokens) 6737 6738 def _parse_function( 6739 self, 6740 functions: dict[str, t.Callable] | None = None, 6741 anonymous: bool = False, 6742 optional_parens: bool = True, 6743 any_token: bool = False, 6744 ) -> exp.Expr | None: 6745 # This allows us to also parse {fn <function>} syntax (Snowflake, MySQL support this) 6746 # See: https://community.snowflake.com/s/article/SQL-Escape-Sequences 6747 fn_syntax = False 6748 if ( 6749 self._match(TokenType.L_BRACE, advance=False) 6750 and self._next 6751 and self._next.text.upper() == "FN" 6752 ): 6753 self._advance(2) 6754 fn_syntax = True 6755 6756 func = self._parse_function_call( 6757 functions=functions, 6758 anonymous=anonymous, 6759 optional_parens=optional_parens, 6760 any_token=any_token, 6761 ) 6762 6763 if fn_syntax: 6764 self._match(TokenType.R_BRACE) 6765 6766 return func 6767 6768 def _parse_function_args(self, alias: bool = False) -> list[exp.Expr]: 6769 return self._parse_csv(lambda: self._parse_lambda(alias=alias)) 6770 6771 def _parse_function_call( 6772 self, 6773 functions: dict[str, t.Callable] | None = None, 6774 anonymous: bool = False, 6775 optional_parens: bool = True, 6776 any_token: bool = False, 6777 ) -> exp.Expr | None: 6778 if not self._curr: 6779 return None 6780 6781 comments = self._curr.comments 6782 prev = self._prev 6783 token = self._curr 6784 token_type = self._curr.token_type 6785 this: str | exp.Expr = self._curr.text 6786 upper = self._curr.text.upper() 6787 6788 parser = self.NO_PAREN_FUNCTION_PARSERS.get(upper) 6789 if optional_parens and parser and token_type not in self.INVALID_FUNC_NAME_TOKENS: 6790 self._advance() 6791 return self._parse_window(parser(self)) 6792 6793 if self._next.token_type != TokenType.L_PAREN: 6794 if optional_parens and token_type in self.NO_PAREN_FUNCTIONS: 6795 self._advance() 6796 return self.expression(self.NO_PAREN_FUNCTIONS[token_type]()) 6797 6798 return None 6799 6800 if any_token: 6801 if token_type in self.RESERVED_TOKENS: 6802 return None 6803 elif token_type not in self.FUNC_TOKENS: 6804 return None 6805 6806 self._advance(2) 6807 6808 parser = self.FUNCTION_PARSERS.get(upper) 6809 if parser and not anonymous: 6810 result = parser(self) 6811 else: 6812 subquery_predicate = self.SUBQUERY_PREDICATES.get(token_type) 6813 6814 if subquery_predicate: 6815 expr = None 6816 if self._curr.token_type in self.SUBQUERY_TOKENS: 6817 expr = self._parse_select() 6818 self._match_r_paren() 6819 elif prev and prev.token_type in (TokenType.LIKE, TokenType.ILIKE): 6820 # Backtrack one token since we've consumed the L_PAREN here. Instead, we'd like 6821 # to parse "LIKE [ANY | ALL] (...)" as a whole into an exp.Tuple or exp.Paren 6822 self._advance(-1) 6823 expr = self._parse_bitwise() 6824 6825 if expr: 6826 return self.expression(subquery_predicate(this=expr), comments=comments) 6827 6828 if functions is None: 6829 functions = self.FUNCTIONS 6830 6831 function = functions.get(upper) 6832 known_function = function and not anonymous 6833 6834 alias = not known_function or upper in self.FUNCTIONS_WITH_ALIASED_ARGS 6835 args = self._parse_function_args(alias) 6836 6837 post_func_comments = self._curr.comments if self._curr else None 6838 if known_function and post_func_comments: 6839 # If the user-inputted comment "/* sqlglot.anonymous */" is following the function 6840 # call we'll construct it as exp.Anonymous, even if it's "known" 6841 if any( 6842 comment.lstrip().startswith(exp.SQLGLOT_ANONYMOUS) 6843 for comment in post_func_comments 6844 ): 6845 known_function = False 6846 6847 if alias and known_function: 6848 args = self._kv_to_prop_eq(args) 6849 6850 if known_function: 6851 func_builder = t.cast(t.Callable, function) 6852 6853 # mypyc compiled functions don't have __code__, so we use 6854 # try/except to check if func_builder accepts 'dialect'. 6855 try: 6856 func = func_builder(args) 6857 except TypeError: 6858 func = func_builder(args, dialect=self.dialect) 6859 6860 func = self.validate_expression(func, args) 6861 if self.dialect.PRESERVE_ORIGINAL_NAMES: 6862 func.meta["name"] = this 6863 6864 result = func 6865 else: 6866 if token_type == TokenType.IDENTIFIER: 6867 this = exp.Identifier(this=this, quoted=True).update_positions(token) 6868 6869 result = self.expression(exp.Anonymous(this=this, expressions=args)) 6870 6871 result = result.update_positions(token) 6872 6873 if isinstance(result, exp.Expr): 6874 result.add_comments(comments) 6875 6876 if parser: 6877 self._match(TokenType.R_PAREN, expression=result) 6878 else: 6879 self._match_r_paren(result) 6880 return self._parse_window(result) 6881 6882 def _to_prop_eq(self, expression: exp.Expr, index: int) -> exp.Expr: 6883 return expression 6884 6885 def _kv_to_prop_eq( 6886 self, expressions: list[exp.Expr], parse_map: bool = False 6887 ) -> list[exp.Expr]: 6888 transformed = [] 6889 6890 for index, e in enumerate(expressions): 6891 if isinstance(e, self.KEY_VALUE_DEFINITIONS): 6892 if isinstance(e, exp.Alias): 6893 e = self.expression(exp.PropertyEQ(this=e.args.get("alias"), expression=e.this)) 6894 6895 if not isinstance(e, exp.PropertyEQ): 6896 e = self.expression( 6897 exp.PropertyEQ( 6898 this=e.this if parse_map else exp.to_identifier(e.this.name), 6899 expression=e.expression, 6900 ) 6901 ) 6902 6903 if isinstance(e.this, exp.Column): 6904 e.this.replace(e.this.this) 6905 else: 6906 e = self._to_prop_eq(e, index) 6907 6908 transformed.append(e) 6909 6910 return transformed 6911 6912 def _parse_function_properties(self) -> exp.Properties | None: 6913 return self._parse_properties() 6914 6915 def _parse_user_defined_function_expression(self) -> exp.Expr | None: 6916 return self._parse_statement() 6917 6918 def _parse_function_parameter(self) -> exp.Expr | None: 6919 return self._parse_column_def(this=self._parse_id_var(), computed_column=False) 6920 6921 def _parse_user_defined_function(self, kind: TokenType | None = None) -> exp.Expr | None: 6922 this = self._parse_table_parts(schema=True) 6923 6924 if not self._match(TokenType.L_PAREN): 6925 return this 6926 6927 expressions = self._parse_csv(self._parse_function_parameter) 6928 self._match_r_paren() 6929 return self.expression( 6930 exp.UserDefinedFunction(this=this, expressions=expressions, wrapped=True) 6931 ) 6932 6933 def _parse_introducer(self, token: Token) -> exp.Introducer | exp.Identifier: 6934 literal = self._parse_primary() 6935 if literal: 6936 return self.expression(exp.Introducer(this=token.text, expression=literal), token) 6937 6938 return self._identifier_expression(token) 6939 6940 def _parse_session_parameter(self) -> exp.SessionParameter: 6941 kind = None 6942 this = self._parse_id_var() or self._parse_primary() 6943 6944 if this and self._match(TokenType.DOT): 6945 kind = this.name 6946 this = self._parse_var() or self._parse_primary() 6947 6948 return self.expression(exp.SessionParameter(this=this, kind=kind)) 6949 6950 def _parse_lambda_arg(self) -> exp.Expr | None: 6951 return self._parse_id_var() 6952 6953 def _parse_lambda(self, alias: bool = False) -> exp.Expr | None: 6954 next_token_type = self._next.token_type 6955 6956 # Fast path: simple atom (column, literal, null, bool) followed by , or ) 6957 if ( 6958 next_token_type in self.LAMBDA_ARG_TERMINATORS 6959 and (atom := self._parse_atom()) is not None 6960 ): 6961 return atom 6962 6963 index = self._index 6964 6965 if self._match(TokenType.L_PAREN): 6966 expressions = t.cast( 6967 list[t.Optional[exp.Expr]], self._parse_csv(self._parse_lambda_arg) 6968 ) 6969 6970 if not self._match(TokenType.R_PAREN): 6971 self._retreat(index) 6972 elif self._match_set(self.LAMBDAS): 6973 return self.LAMBDAS[self._prev.token_type](self, expressions) 6974 else: 6975 self._retreat(index) 6976 elif self.TYPED_LAMBDA_ARGS or next_token_type in self.LAMBDAS: 6977 expressions = [self._parse_lambda_arg()] 6978 6979 if self._match_set(self.LAMBDAS): 6980 return self.LAMBDAS[self._prev.token_type](self, expressions) 6981 6982 self._retreat(index) 6983 6984 this: exp.Expr | None 6985 6986 if self._match(TokenType.DISTINCT): 6987 this = self.expression( 6988 exp.Distinct(expressions=self._parse_csv(self._parse_disjunction)) 6989 ) 6990 else: 6991 this = self._parse_select_or_expression(alias=alias) 6992 6993 return self._parse_limit( 6994 self._parse_respect_or_ignore_nulls( 6995 self._parse_order(self._parse_having_max(self._parse_respect_or_ignore_nulls(this))) 6996 ) 6997 ) 6998 6999 def _parse_schema(self, this: exp.Expr | None = None) -> exp.Expr | None: 7000 index = self._index 7001 if not self._match(TokenType.L_PAREN): 7002 return this 7003 7004 # Disambiguate between schema and subquery/CTE, e.g. in INSERT INTO table (<expr>), 7005 # expr can be of both types 7006 if self._match_set(self.SELECT_START_TOKENS): 7007 self._retreat(index) 7008 return this 7009 args = self._parse_csv(lambda: self._parse_constraint() or self._parse_field_def()) 7010 self._match_r_paren() 7011 return self.expression(exp.Schema(this=this, expressions=args)) 7012 7013 def _parse_field_def(self) -> exp.Expr | None: 7014 return self._parse_column_def(self._parse_field(any_token=True)) 7015 7016 def _parse_column_def( 7017 self, this: exp.Expr | None, computed_column: bool = True 7018 ) -> exp.Expr | None: 7019 # column defs are not really columns, they're identifiers 7020 if isinstance(this, exp.Column): 7021 this = this.this 7022 7023 if not computed_column: 7024 self._match(TokenType.ALIAS) 7025 7026 kind = self._parse_types(schema=True) 7027 7028 if self._match_text_seq("FOR", "ORDINALITY"): 7029 return self.expression(exp.ColumnDef(this=this, ordinality=True)) 7030 7031 constraints: list[exp.Expr] = [] 7032 7033 if (not kind and self._match(TokenType.ALIAS)) or self._match_texts( 7034 ("ALIAS", "MATERIALIZED") 7035 ): 7036 persisted = self._prev.text.upper() == "MATERIALIZED" 7037 constraint_kind = exp.ComputedColumnConstraint( 7038 this=self._parse_disjunction(), 7039 persisted=persisted or self._match_text_seq("PERSISTED"), 7040 data_type=exp.Var(this="AUTO") 7041 if self._match_text_seq("AUTO") 7042 else self._parse_types(), 7043 not_null=self._match_pair(TokenType.NOT, TokenType.NULL), 7044 ) 7045 constraints.append(self.expression(exp.ColumnConstraint(kind=constraint_kind))) 7046 elif not kind and self._match_set({TokenType.IN, TokenType.OUT}, advance=False): 7047 in_out_constraint = self.expression( 7048 exp.InOutColumnConstraint( 7049 input_=self._match(TokenType.IN), output=self._match(TokenType.OUT) 7050 ) 7051 ) 7052 constraints.append(in_out_constraint) 7053 kind = self._parse_types() 7054 elif ( 7055 kind 7056 and self._match(TokenType.ALIAS, advance=False) 7057 and ( 7058 not self.WRAPPED_TRANSFORM_COLUMN_CONSTRAINT 7059 or self._next.token_type == TokenType.L_PAREN 7060 ) 7061 ): 7062 self._advance() 7063 constraints.append( 7064 self.expression( 7065 exp.ColumnConstraint( 7066 kind=exp.ComputedColumnConstraint( 7067 this=self._parse_disjunction(), 7068 persisted=self._match_texts(("STORED", "VIRTUAL")) 7069 and self._prev.text.upper() == "STORED", 7070 ) 7071 ) 7072 ) 7073 ) 7074 7075 while True: 7076 constraint = self._parse_column_constraint() 7077 if not constraint: 7078 break 7079 constraints.append(constraint) 7080 7081 if not kind and not constraints: 7082 return this 7083 7084 return self.expression(exp.ColumnDef(this=this, kind=kind, constraints=constraints)) 7085 7086 def _parse_auto_increment( 7087 self, 7088 ) -> exp.GeneratedAsIdentityColumnConstraint | exp.AutoIncrementColumnConstraint: 7089 start = None 7090 increment = None 7091 order = None 7092 7093 if self._match(TokenType.L_PAREN, advance=False): 7094 args = self._parse_wrapped_csv(self._parse_bitwise) 7095 start = seq_get(args, 0) 7096 increment = seq_get(args, 1) 7097 elif self._match_text_seq("START"): 7098 start = self._parse_bitwise() 7099 self._match_text_seq("INCREMENT") 7100 increment = self._parse_bitwise() 7101 if self._match_text_seq("ORDER"): 7102 order = True 7103 elif self._match_text_seq("NOORDER"): 7104 order = False 7105 7106 if start and increment: 7107 return exp.GeneratedAsIdentityColumnConstraint( 7108 start=start, increment=increment, this=False, order=order 7109 ) 7110 7111 return exp.AutoIncrementColumnConstraint() 7112 7113 def _parse_check_constraint(self) -> exp.CheckColumnConstraint | None: 7114 if not self._match(TokenType.L_PAREN, advance=False): 7115 return None 7116 7117 return self.expression( 7118 exp.CheckColumnConstraint( 7119 this=self._parse_wrapped(self._parse_assignment), 7120 enforced=self._match_text_seq("ENFORCED"), 7121 ) 7122 ) 7123 7124 def _parse_auto_property(self) -> exp.AutoRefreshProperty | None: 7125 if not self._match_text_seq("REFRESH"): 7126 self._retreat(self._index - 1) 7127 return None 7128 return self.expression(exp.AutoRefreshProperty(this=self._parse_var(upper=True))) 7129 7130 def _parse_compress(self) -> exp.CompressColumnConstraint: 7131 if self._match(TokenType.L_PAREN, advance=False): 7132 return self.expression( 7133 exp.CompressColumnConstraint(this=self._parse_wrapped_csv(self._parse_bitwise)) 7134 ) 7135 7136 return self.expression(exp.CompressColumnConstraint(this=self._parse_bitwise())) 7137 7138 def _parse_generated_as_identity( 7139 self, 7140 ) -> ( 7141 exp.GeneratedAsIdentityColumnConstraint 7142 | exp.ComputedColumnConstraint 7143 | exp.GeneratedAsRowColumnConstraint 7144 ): 7145 if self._match_text_seq("BY", "DEFAULT"): 7146 on_null = self._match_pair(TokenType.ON, TokenType.NULL) 7147 this = self.expression( 7148 exp.GeneratedAsIdentityColumnConstraint(this=False, on_null=on_null) 7149 ) 7150 else: 7151 self._match_text_seq("ALWAYS") 7152 this = self.expression(exp.GeneratedAsIdentityColumnConstraint(this=True)) 7153 7154 self._match(TokenType.ALIAS) 7155 7156 if self._match_text_seq("ROW"): 7157 start = self._match_text_seq("START") 7158 if not start: 7159 self._match(TokenType.END) 7160 hidden = self._match_text_seq("HIDDEN") 7161 return self.expression(exp.GeneratedAsRowColumnConstraint(start=start, hidden=hidden)) 7162 7163 identity = self._match_text_seq("IDENTITY") 7164 7165 if self._match(TokenType.L_PAREN): 7166 if self._match(TokenType.START_WITH): 7167 this.set("start", self._parse_bitwise()) 7168 if self._match_text_seq("INCREMENT", "BY"): 7169 this.set("increment", self._parse_bitwise()) 7170 if self._match_text_seq("MINVALUE"): 7171 this.set("minvalue", self._parse_bitwise()) 7172 if self._match_text_seq("MAXVALUE"): 7173 this.set("maxvalue", self._parse_bitwise()) 7174 7175 if self._match_text_seq("CYCLE"): 7176 this.set("cycle", True) 7177 elif self._match_text_seq("NO", "CYCLE"): 7178 this.set("cycle", False) 7179 7180 if not identity: 7181 this.set("expression", self._parse_range()) 7182 elif not this.args.get("start") and self._match(TokenType.NUMBER, advance=False): 7183 args = self._parse_csv(self._parse_bitwise) 7184 this.set("start", seq_get(args, 0)) 7185 this.set("increment", seq_get(args, 1)) 7186 7187 self._match_r_paren() 7188 7189 return this 7190 7191 def _parse_inline(self) -> exp.InlineLengthColumnConstraint: 7192 self._match_text_seq("LENGTH") 7193 return self.expression(exp.InlineLengthColumnConstraint(this=self._parse_bitwise())) 7194 7195 def _parse_not_constraint(self) -> exp.Expr | None: 7196 if self._match_text_seq("NULL"): 7197 return self.expression(exp.NotNullColumnConstraint()) 7198 if self._match_text_seq("CASESPECIFIC"): 7199 return self.expression(exp.CaseSpecificColumnConstraint(not_=True)) 7200 if self._match_text_seq("FOR", "REPLICATION"): 7201 return self.expression(exp.NotForReplicationColumnConstraint()) 7202 7203 # Unconsume the `NOT` token 7204 self._retreat(self._index - 1) 7205 return None 7206 7207 def _parse_column_constraint(self) -> exp.Expr | None: 7208 this = self._parse_id_var() if self._match(TokenType.CONSTRAINT) else None 7209 7210 procedure_option_follows = ( 7211 self._match(TokenType.WITH, advance=False) 7212 and self._next 7213 and self._next.text.upper() in self.PROCEDURE_OPTIONS 7214 ) 7215 7216 if not procedure_option_follows and self._match_texts(self.CONSTRAINT_PARSERS): 7217 constraint = self.CONSTRAINT_PARSERS[self._prev.text.upper()](self) 7218 if not constraint: 7219 self._retreat(self._index - 1) 7220 return None 7221 7222 return self.expression(exp.ColumnConstraint(this=this, kind=constraint)) 7223 7224 return this 7225 7226 def _parse_constraint(self) -> exp.Expr | None: 7227 if not self._match(TokenType.CONSTRAINT): 7228 return self._parse_unnamed_constraint(constraints=self.SCHEMA_UNNAMED_CONSTRAINTS) 7229 7230 return self.expression( 7231 exp.Constraint(this=self._parse_id_var(), expressions=self._parse_unnamed_constraints()) 7232 ) 7233 7234 def _parse_unnamed_constraints(self) -> list[exp.Expr]: 7235 constraints = [] 7236 while True: 7237 constraint = self._parse_unnamed_constraint() or self._parse_function() 7238 if not constraint: 7239 break 7240 constraints.append(constraint) 7241 7242 return constraints 7243 7244 def _parse_unnamed_constraint( 7245 self, constraints: t.Collection[str] | None = None 7246 ) -> exp.Expr | None: 7247 index = self._index 7248 7249 if self._match(TokenType.IDENTIFIER, advance=False) or not self._match_texts( 7250 constraints or self.CONSTRAINT_PARSERS 7251 ): 7252 return None 7253 7254 constraint_key = self._prev.text.upper() 7255 if constraint_key not in self.CONSTRAINT_PARSERS: 7256 self.raise_error(f"No parser found for schema constraint {constraint_key}.") 7257 7258 result = self.CONSTRAINT_PARSERS[constraint_key](self) 7259 if not result: 7260 self._retreat(index) 7261 7262 return result 7263 7264 def _parse_unique_key(self) -> exp.Expr | None: 7265 if ( 7266 self._curr 7267 and self._curr.token_type != TokenType.IDENTIFIER 7268 and self._curr.text.upper() in self.CONSTRAINT_PARSERS 7269 ): 7270 return None 7271 return self._parse_id_var(any_token=False) 7272 7273 def _parse_unique(self) -> exp.UniqueColumnConstraint: 7274 self._match_texts(("KEY", "INDEX")) 7275 return self.expression( 7276 exp.UniqueColumnConstraint( 7277 nulls=self._match_text_seq("NULLS", "NOT", "DISTINCT"), 7278 this=self._parse_schema(self._parse_unique_key()), 7279 index_type=self._match(TokenType.USING) and self._advance_any() and self._prev.text, 7280 on_conflict=self._parse_on_conflict(), 7281 options=self._parse_key_constraint_options(), 7282 ) 7283 ) 7284 7285 def _parse_key_constraint_options(self) -> list[str]: 7286 options = [] 7287 while True: 7288 if not self._curr: 7289 break 7290 7291 if self._match(TokenType.ON): 7292 action = None 7293 on = self._advance_any() and self._prev.text 7294 7295 if self._match_text_seq("NO", "ACTION"): 7296 action = "NO ACTION" 7297 elif self._match_text_seq("CASCADE"): 7298 action = "CASCADE" 7299 elif self._match_text_seq("RESTRICT"): 7300 action = "RESTRICT" 7301 elif self._match_pair(TokenType.SET, TokenType.NULL): 7302 action = "SET NULL" 7303 elif self._match_pair(TokenType.SET, TokenType.DEFAULT): 7304 action = "SET DEFAULT" 7305 else: 7306 self.raise_error("Invalid key constraint") 7307 7308 options.append(f"ON {on} {action}") 7309 else: 7310 var = self._parse_var_from_options( 7311 self.KEY_CONSTRAINT_OPTIONS, raise_unmatched=False 7312 ) 7313 if not var: 7314 break 7315 options.append(var.name) 7316 7317 return options 7318 7319 def _parse_references(self, match: bool = True) -> exp.Reference | None: 7320 if match and not self._match(TokenType.REFERENCES): 7321 return None 7322 7323 expressions: list | None = None 7324 this = self._parse_table(schema=True) 7325 options = self._parse_key_constraint_options() 7326 return self.expression(exp.Reference(this=this, expressions=expressions, options=options)) 7327 7328 def _parse_foreign_key(self) -> exp.ForeignKey: 7329 expressions = ( 7330 self._parse_wrapped_id_vars() 7331 if not self._match(TokenType.REFERENCES, advance=False) 7332 else None 7333 ) 7334 reference = self._parse_references() 7335 on_options = {} 7336 7337 while self._match(TokenType.ON): 7338 if not self._match_set((TokenType.DELETE, TokenType.UPDATE)): 7339 self.raise_error("Expected DELETE or UPDATE") 7340 7341 kind = self._prev.text.lower() 7342 7343 if self._match_text_seq("NO", "ACTION"): 7344 action = "NO ACTION" 7345 elif self._match(TokenType.SET): 7346 self._match_set((TokenType.NULL, TokenType.DEFAULT)) 7347 action = "SET " + self._prev.text.upper() 7348 else: 7349 self._advance() 7350 action = self._prev.text.upper() 7351 7352 on_options[kind] = action 7353 7354 return self.expression( 7355 exp.ForeignKey( 7356 expressions=expressions, 7357 reference=reference, 7358 options=self._parse_key_constraint_options(), 7359 **on_options, 7360 ) 7361 ) 7362 7363 def _parse_primary_key_part(self) -> exp.Expr | None: 7364 return self._parse_field() 7365 7366 def _parse_period_for_system_time(self) -> exp.PeriodForSystemTimeConstraint | None: 7367 if not self._match(TokenType.TIMESTAMP_SNAPSHOT): 7368 self._retreat(self._index - 1) 7369 return None 7370 7371 id_vars = self._parse_wrapped_id_vars() 7372 return self.expression( 7373 exp.PeriodForSystemTimeConstraint( 7374 this=seq_get(id_vars, 0), expression=seq_get(id_vars, 1) 7375 ) 7376 ) 7377 7378 def _parse_primary_key( 7379 self, 7380 wrapped_optional: bool = False, 7381 in_props: bool = False, 7382 named_primary_key: bool = False, 7383 ) -> exp.PrimaryKeyColumnConstraint | exp.PrimaryKey: 7384 desc = ( 7385 self._prev.token_type == TokenType.DESC 7386 if self._match_set((TokenType.ASC, TokenType.DESC)) 7387 else None 7388 ) 7389 7390 this = None 7391 if ( 7392 named_primary_key 7393 and self._curr.text.upper() not in self.CONSTRAINT_PARSERS 7394 and self._next 7395 and self._next.token_type == TokenType.L_PAREN 7396 ): 7397 this = self._parse_id_var() 7398 7399 if not in_props and not self._match(TokenType.L_PAREN, advance=False): 7400 return self.expression( 7401 exp.PrimaryKeyColumnConstraint( 7402 desc=desc, options=self._parse_key_constraint_options() 7403 ) 7404 ) 7405 7406 expressions = self._parse_wrapped_csv( 7407 self._parse_primary_key_part, optional=wrapped_optional 7408 ) 7409 7410 return self.expression( 7411 exp.PrimaryKey( 7412 this=this, 7413 expressions=expressions, 7414 include=self._parse_index_params(), 7415 options=self._parse_key_constraint_options(), 7416 ) 7417 ) 7418 7419 def _parse_bracket_key_value(self, is_map: bool = False) -> exp.Expr | None: 7420 return self._parse_slice(self._parse_alias(self._parse_disjunction(), explicit=True)) 7421 7422 def _parse_odbc_datetime_literal(self) -> exp.Expr: 7423 """ 7424 Parses a datetime column in ODBC format. We parse the column into the corresponding 7425 types, for example `{d'yyyy-mm-dd'}` will be parsed as a `Date` column, exactly the 7426 same as we did for `DATE('yyyy-mm-dd')`. 7427 7428 Reference: 7429 https://learn.microsoft.com/en-us/sql/odbc/reference/develop-app/date-time-and-timestamp-literals 7430 """ 7431 self._match(TokenType.VAR) 7432 exp_class = self.ODBC_DATETIME_LITERALS[self._prev.text.lower()] 7433 expression = self.expression(exp_class(this=self._parse_string())) 7434 if not self._match(TokenType.R_BRACE): 7435 self.raise_error("Expected }") 7436 return expression 7437 7438 def _parse_bracket(self, this: exp.Expr | None = None) -> exp.Expr | None: 7439 if not self._match_set(self.BRACKETS): 7440 return this 7441 7442 if self.MAP_KEYS_ARE_ARBITRARY_EXPRESSIONS: 7443 map_token = seq_get(self._tokens, self._index - 2) 7444 parse_map = map_token is not None and map_token.text.upper() == "MAP" 7445 else: 7446 parse_map = False 7447 7448 bracket_kind = self._prev.token_type 7449 if ( 7450 bracket_kind == TokenType.L_BRACE 7451 and self._curr 7452 and self._curr.token_type == TokenType.VAR 7453 and self._curr.text.lower() in self.ODBC_DATETIME_LITERALS 7454 ): 7455 return self._parse_odbc_datetime_literal() 7456 7457 expressions = self._parse_csv( 7458 lambda: self._parse_bracket_key_value(is_map=bracket_kind == TokenType.L_BRACE) 7459 ) 7460 7461 if bracket_kind == TokenType.L_BRACKET and not self._match(TokenType.R_BRACKET): 7462 self.raise_error("Expected ]") 7463 elif bracket_kind == TokenType.L_BRACE and not self._match(TokenType.R_BRACE): 7464 self.raise_error("Expected }") 7465 7466 # https://duckdb.org/docs/sql/data_types/struct.html#creating-structs 7467 if bracket_kind == TokenType.L_BRACE: 7468 this = self.expression( 7469 exp.Struct( 7470 expressions=self._kv_to_prop_eq(expressions=expressions, parse_map=parse_map) 7471 ) 7472 ) 7473 elif not this: 7474 this = build_array_constructor( 7475 exp.Array, args=expressions, bracket_kind=bracket_kind, dialect=self.dialect 7476 ) 7477 else: 7478 constructor_type = self.ARRAY_CONSTRUCTORS.get(this.name.upper()) 7479 if constructor_type: 7480 return build_array_constructor( 7481 constructor_type, 7482 args=expressions, 7483 bracket_kind=bracket_kind, 7484 dialect=self.dialect, 7485 ) 7486 7487 expressions = apply_index_offset( 7488 this, expressions, -self.dialect.INDEX_OFFSET, dialect=self.dialect 7489 ) 7490 this = self.expression( 7491 exp.Bracket(this=this, expressions=expressions), comments=this.pop_comments() 7492 ) 7493 7494 self._add_comments(this) 7495 return self._parse_bracket(this) 7496 7497 def _parse_slice(self, this: exp.Expr | None) -> exp.Expr | None: 7498 if not self._match(TokenType.COLON): 7499 return this 7500 7501 if self._match_pair(TokenType.DASH, TokenType.COLON, advance=False): 7502 self._advance() 7503 end: exp.Expr | None = -exp.Literal.number("1") 7504 else: 7505 end = self._parse_assignment() 7506 step = self._parse_unary() if self._match(TokenType.COLON) else None 7507 return self.expression(exp.Slice(this=this, expression=end, step=step)) 7508 7509 def _parse_case(self) -> exp.Expr | None: 7510 if self._match(TokenType.DOT, advance=False): 7511 # Avoid raising on valid expressions like case.*, supported by, e.g., spark & snowflake 7512 self._retreat(self._index - 1) 7513 return None 7514 7515 ifs = [] 7516 default = None 7517 7518 comments = self._prev_comments 7519 expression = self._parse_disjunction() 7520 7521 while self._match(TokenType.WHEN): 7522 this = self._parse_disjunction() 7523 self._match(TokenType.THEN) 7524 then = self._parse_disjunction() 7525 ifs.append(self.expression(exp.If(this=this, true=then))) 7526 7527 if self._match(TokenType.ELSE): 7528 default = self._parse_disjunction() 7529 7530 if not self._match(TokenType.END): 7531 if isinstance(default, exp.Interval) and default.this.sql().upper() == "END": 7532 default = exp.column("interval") 7533 else: 7534 self.raise_error("Expected END after CASE", self._prev) 7535 7536 return self.expression( 7537 exp.Case(this=expression, ifs=ifs, default=default), comments=comments 7538 ) 7539 7540 def _parse_if(self) -> exp.Expr | None: 7541 if self._match(TokenType.L_PAREN): 7542 args = self._parse_csv( 7543 lambda: self._parse_alias(self._parse_assignment(), explicit=True) 7544 ) 7545 this = self.validate_expression(exp.If.from_arg_list(args), args) 7546 self._match_r_paren() 7547 else: 7548 index = self._index - 1 7549 7550 if self.NO_PAREN_IF_COMMANDS and index == 0: 7551 return self._parse_as_command(self._prev) 7552 7553 condition = self._parse_disjunction() 7554 7555 if not condition: 7556 self._retreat(index) 7557 return None 7558 7559 self._match(TokenType.THEN) 7560 true = self._parse_disjunction() 7561 false = self._parse_disjunction() if self._match(TokenType.ELSE) else None 7562 self._match(TokenType.END) 7563 this = self.expression(exp.If(this=condition, true=true, false=false)) 7564 7565 return this 7566 7567 def _parse_next_value_for(self) -> exp.Expr | None: 7568 if not self._match_text_seq("VALUE", "FOR"): 7569 self._retreat(self._index - 1) 7570 return None 7571 7572 return self.expression( 7573 exp.NextValueFor( 7574 this=self._parse_column(), 7575 order=self._match(TokenType.OVER) and self._parse_wrapped(self._parse_order), 7576 ) 7577 ) 7578 7579 def _parse_extract(self) -> exp.Extract | exp.Anonymous: 7580 this = self._parse_function() or self._parse_var_or_string(upper=True) 7581 7582 if self._match(TokenType.FROM): 7583 return self.expression(exp.Extract(this=this, expression=self._parse_bitwise())) 7584 7585 if not self._match(TokenType.COMMA): 7586 self.raise_error("Expected FROM or comma after EXTRACT", self._prev) 7587 7588 return self.expression(exp.Extract(this=this, expression=self._parse_bitwise())) 7589 7590 def _parse_gap_fill(self) -> exp.GapFill: 7591 self._match(TokenType.TABLE) 7592 this = self._parse_table() 7593 7594 self._match(TokenType.COMMA) 7595 args = [this, *self._parse_csv(self._parse_lambda)] 7596 7597 gap_fill = exp.GapFill.from_arg_list(args) 7598 return self.validate_expression(gap_fill, args) 7599 7600 def _parse_char(self) -> exp.Chr: 7601 return self.expression( 7602 exp.Chr( 7603 expressions=self._parse_csv(self._parse_assignment), 7604 charset=self._match(TokenType.USING) and self._parse_var(), 7605 ) 7606 ) 7607 7608 def _parse_cast(self, strict: bool, safe: bool | None = None) -> exp.Expr: 7609 this = self._parse_assignment() 7610 7611 if not self._match(TokenType.ALIAS): 7612 if self._match(TokenType.COMMA): 7613 return self.expression(exp.CastToStrType(this=this, to=self._parse_string())) 7614 7615 self.raise_error("Expected AS after CAST") 7616 7617 fmt = None 7618 to = self._parse_types() 7619 7620 default = None 7621 if self._match(TokenType.DEFAULT): 7622 default = self._parse_bitwise() 7623 self._match_text_seq("ON", "CONVERSION", "ERROR") 7624 7625 if self._match_set((TokenType.FORMAT, TokenType.COMMA)): 7626 fmt_string = self._parse_wrapped(self._parse_string, optional=True) 7627 fmt = self._parse_at_time_zone(fmt_string) 7628 7629 if not to: 7630 to = exp.DType.UNKNOWN.into_expr() 7631 if to.this in exp.DataType.TEMPORAL_TYPES: 7632 this = self.expression( 7633 (exp.StrToDate if to.this == exp.DType.DATE else exp.StrToTime)( 7634 this=this, 7635 format=exp.Literal.string( 7636 format_time( 7637 fmt_string.this if fmt_string else "", 7638 self.dialect.FORMAT_MAPPING or self.dialect.TIME_MAPPING, 7639 self.dialect.FORMAT_TRIE or self.dialect.TIME_TRIE, 7640 ) 7641 ), 7642 safe=safe, 7643 ) 7644 ) 7645 7646 if isinstance(fmt, exp.AtTimeZone) and isinstance(this, exp.StrToTime): 7647 this.set("zone", fmt.args["zone"]) 7648 return this 7649 elif not to: 7650 self.raise_error("Expected TYPE after CAST") 7651 elif isinstance(to, exp.Identifier): 7652 to = exp.DataType.build(to.name, dialect=self.dialect, udt=True) 7653 elif to.this == exp.DType.CHAR and self._match(TokenType.CHARACTER_SET): 7654 to = exp.DType.CHARACTER_SET.into_expr(kind=self._parse_var_or_string()) 7655 7656 return self.build_cast( 7657 strict=strict, 7658 this=this, 7659 to=to, 7660 format=fmt, 7661 safe=safe, 7662 action=self._parse_var_from_options(self.CAST_ACTIONS, raise_unmatched=False), 7663 default=default, 7664 ) 7665 7666 def _parse_string_agg(self) -> exp.GroupConcat: 7667 if self._match(TokenType.DISTINCT): 7668 args: list[exp.Expr | None] = [ 7669 self.expression(exp.Distinct(expressions=[self._parse_disjunction()])) 7670 ] 7671 if self._match(TokenType.COMMA): 7672 args.extend(self._parse_csv(self._parse_disjunction)) 7673 else: 7674 args = self._parse_csv(self._parse_disjunction) # type: ignore 7675 7676 if self._match_text_seq("ON", "OVERFLOW"): 7677 # trino: LISTAGG(expression [, separator] [ON OVERFLOW overflow_behavior]) 7678 if self._match_text_seq("ERROR"): 7679 on_overflow: exp.Expr | None = exp.var("ERROR") 7680 else: 7681 self._match_text_seq("TRUNCATE") 7682 on_overflow = self.expression( 7683 exp.OverflowTruncateBehavior( 7684 this=self._parse_string(), 7685 with_count=( 7686 self._match_text_seq("WITH", "COUNT") 7687 or not self._match_text_seq("WITHOUT", "COUNT") 7688 ), 7689 ) 7690 ) 7691 else: 7692 on_overflow = None 7693 7694 index = self._index 7695 if not self._match(TokenType.R_PAREN) and args: 7696 # postgres: STRING_AGG([DISTINCT] expression, separator [ORDER BY expression1 {ASC | DESC} [, ...]]) 7697 # bigquery: STRING_AGG([DISTINCT] expression [, separator] [ORDER BY key [{ASC | DESC}] [, ... ]] [LIMIT n]) 7698 # The order is parsed through `this` as a canonicalization for WITHIN GROUPs 7699 args[0] = self._parse_limit(this=self._parse_order(this=args[0])) 7700 return self.expression(exp.GroupConcat(this=args[0], separator=seq_get(args, 1))) 7701 7702 # Checks if we can parse an order clause: WITHIN GROUP (ORDER BY <order_by_expression_list> [ASC | DESC]). 7703 # This is done "manually", instead of letting _parse_window parse it into an exp.WithinGroup node, so that 7704 # the STRING_AGG call is parsed like in MySQL / SQLite and can thus be transpiled more easily to them. 7705 if not self._match_text_seq("WITHIN", "GROUP"): 7706 self._retreat(index) 7707 return self.validate_expression(exp.GroupConcat.from_arg_list(args), args) 7708 7709 # The corresponding match_r_paren will be called in parse_function (caller) 7710 self._match_l_paren() 7711 7712 return self.expression( 7713 exp.GroupConcat( 7714 this=self._parse_order(this=seq_get(args, 0)), 7715 separator=seq_get(args, 1), 7716 on_overflow=on_overflow, 7717 ) 7718 ) 7719 7720 def _parse_convert(self, strict: bool, safe: bool | None = None) -> exp.Expr | None: 7721 this = self._parse_bitwise() 7722 7723 if self._match(TokenType.USING): 7724 to: exp.Expr | None = exp.DType.CHARACTER_SET.into_expr( 7725 kind=self._parse_var(tokens={TokenType.BINARY}) 7726 ) 7727 elif self._match(TokenType.COMMA): 7728 to = self._parse_types() 7729 else: 7730 to = None 7731 7732 return self.build_cast(strict=strict, this=this, to=to, safe=safe) 7733 7734 def _parse_xml_element(self) -> exp.XMLElement: 7735 if self._match_text_seq("EVALNAME"): 7736 evalname = True 7737 this = self._parse_bitwise() 7738 else: 7739 evalname = None 7740 self._match_text_seq("NAME") 7741 this = self._parse_id_var() 7742 7743 return self.expression( 7744 exp.XMLElement( 7745 this=this, 7746 expressions=self._match(TokenType.COMMA) and self._parse_csv(self._parse_bitwise), 7747 evalname=evalname, 7748 ) 7749 ) 7750 7751 def _parse_xml_table(self) -> exp.XMLTable: 7752 namespaces = None 7753 passing = None 7754 columns = None 7755 7756 if self._match_text_seq("XMLNAMESPACES", "("): 7757 namespaces = self._parse_xml_namespace() 7758 self._match_text_seq(")", ",") 7759 7760 this = self._parse_string() 7761 7762 if self._match_text_seq("PASSING"): 7763 # The BY VALUE keywords are optional and are provided for semantic clarity 7764 self._match_text_seq("BY", "VALUE") 7765 passing = self._parse_csv(self._parse_column) 7766 7767 by_ref = self._match_text_seq("RETURNING", "SEQUENCE", "BY", "REF") 7768 7769 if self._match_text_seq("COLUMNS"): 7770 columns = self._parse_csv(self._parse_field_def) 7771 7772 return self.expression( 7773 exp.XMLTable( 7774 this=this, namespaces=namespaces, passing=passing, columns=columns, by_ref=by_ref 7775 ) 7776 ) 7777 7778 def _parse_xml_namespace(self) -> list[exp.XMLNamespace]: 7779 namespaces = [] 7780 7781 while True: 7782 if self._match(TokenType.DEFAULT): 7783 uri = self._parse_string() 7784 else: 7785 uri = self._parse_alias(self._parse_string()) 7786 namespaces.append(self.expression(exp.XMLNamespace(this=uri))) 7787 if not self._match(TokenType.COMMA): 7788 break 7789 7790 return namespaces 7791 7792 def _parse_decode(self) -> exp.Decode | exp.DecodeCase | None: 7793 args = self._parse_csv(self._parse_disjunction) 7794 7795 if len(args) < 3: 7796 return self.expression(exp.Decode(this=seq_get(args, 0), charset=seq_get(args, 1))) 7797 7798 return self.expression(exp.DecodeCase(expressions=args)) 7799 7800 def _parse_json_key_value(self) -> exp.JSONKeyValue | None: 7801 self._match_text_seq("KEY") 7802 key = self._parse_column() 7803 self._match_set(self.JSON_KEY_VALUE_SEPARATOR_TOKENS) 7804 self._match_text_seq("VALUE") 7805 value = self._parse_bitwise() 7806 7807 if not key and not value: 7808 return None 7809 return self.expression(exp.JSONKeyValue(this=key, expression=value)) 7810 7811 def _parse_format_json(self, this: exp.Expr | None) -> exp.Expr | None: 7812 if not this or not self._match_text_seq("FORMAT", "JSON"): 7813 return this 7814 7815 return self.expression(exp.FormatJson(this=this)) 7816 7817 def _parse_on_condition(self) -> exp.OnCondition | None: 7818 # MySQL uses "X ON EMPTY Y ON ERROR" (e.g. JSON_VALUE) while Oracle uses the opposite (e.g. JSON_EXISTS) 7819 if self.dialect.ON_CONDITION_EMPTY_BEFORE_ERROR: 7820 empty = self._parse_on_handling("EMPTY", *self.ON_CONDITION_TOKENS) 7821 error = self._parse_on_handling("ERROR", *self.ON_CONDITION_TOKENS) 7822 else: 7823 error = self._parse_on_handling("ERROR", *self.ON_CONDITION_TOKENS) 7824 empty = self._parse_on_handling("EMPTY", *self.ON_CONDITION_TOKENS) 7825 7826 null = self._parse_on_handling("NULL", *self.ON_CONDITION_TOKENS) 7827 7828 if not empty and not error and not null: 7829 return None 7830 7831 return self.expression(exp.OnCondition(empty=empty, error=error, null=null)) 7832 7833 def _parse_on_handling(self, on: str, *values: str) -> str | None | exp.Expr | None: 7834 # Parses the "X ON Y" or "DEFAULT <expr> ON Y syntax, e.g. NULL ON NULL (Oracle, T-SQL, MySQL) 7835 for value in values: 7836 if self._match_text_seq(value, "ON", on): 7837 return f"{value} ON {on}" 7838 7839 index = self._index 7840 if self._match(TokenType.DEFAULT): 7841 default_value = self._parse_bitwise() 7842 if self._match_text_seq("ON", on): 7843 return default_value 7844 7845 self._retreat(index) 7846 7847 return None 7848 7849 @t.overload 7850 def _parse_json_object(self, agg: t.Literal[False]) -> exp.JSONObject: ... 7851 7852 @t.overload 7853 def _parse_json_object(self, agg: t.Literal[True]) -> exp.JSONObjectAgg: ... 7854 7855 def _parse_json_object(self, agg=False): 7856 star = self._parse_star() 7857 expressions = ( 7858 [star] 7859 if star 7860 else self._parse_csv(lambda: self._parse_format_json(self._parse_json_key_value())) 7861 ) 7862 null_handling = self._parse_on_handling("NULL", "NULL", "ABSENT") 7863 7864 unique_keys = None 7865 if self._match_text_seq("WITH", "UNIQUE"): 7866 unique_keys = True 7867 elif self._match_text_seq("WITHOUT", "UNIQUE"): 7868 unique_keys = False 7869 7870 self._match_text_seq("KEYS") 7871 7872 return_type = self._match_text_seq("RETURNING") and self._parse_format_json( 7873 self._parse_type() 7874 ) 7875 encoding = self._match_text_seq("ENCODING") and self._parse_var() 7876 7877 return self.expression( 7878 (exp.JSONObjectAgg if agg else exp.JSONObject)( 7879 expressions=expressions, 7880 null_handling=null_handling, 7881 unique_keys=unique_keys, 7882 return_type=return_type, 7883 encoding=encoding, 7884 ) 7885 ) 7886 7887 # Note: this is currently incomplete; it only implements the "JSON_value_column" part 7888 def _parse_json_column_def(self) -> exp.JSONColumnDef: 7889 if not self._match_text_seq("NESTED"): 7890 this = self._parse_id_var() 7891 ordinality = self._match_pair(TokenType.FOR, TokenType.ORDINALITY) 7892 kind = self._parse_types(allow_identifiers=False) 7893 nested = None 7894 else: 7895 this = None 7896 ordinality = None 7897 kind = None 7898 nested = True 7899 7900 path = self._match_text_seq("PATH") and self._parse_string() 7901 nested_schema = nested and self._parse_json_schema() 7902 7903 return self.expression( 7904 exp.JSONColumnDef( 7905 this=this, kind=kind, path=path, nested_schema=nested_schema, ordinality=ordinality 7906 ) 7907 ) 7908 7909 def _parse_json_schema(self) -> exp.JSONSchema: 7910 self._match_text_seq("COLUMNS") 7911 return self.expression( 7912 exp.JSONSchema( 7913 expressions=self._parse_wrapped_csv(self._parse_json_column_def, optional=True) 7914 ) 7915 ) 7916 7917 def _parse_json_table(self) -> exp.JSONTable: 7918 this = self._parse_format_json(self._parse_bitwise()) 7919 path = self._match(TokenType.COMMA) and self._parse_string() 7920 error_handling = self._parse_on_handling("ERROR", "ERROR", "NULL") 7921 empty_handling = self._parse_on_handling("EMPTY", "ERROR", "NULL") 7922 schema = self._parse_json_schema() 7923 7924 return exp.JSONTable( 7925 this=this, 7926 schema=schema, 7927 path=path, 7928 error_handling=error_handling, 7929 empty_handling=empty_handling, 7930 ) 7931 7932 def _parse_match_against(self) -> exp.MatchAgainst: 7933 if self._match_text_seq("TABLE"): 7934 # parse SingleStore MATCH(TABLE ...) syntax 7935 # https://docs.singlestore.com/cloud/reference/sql-reference/full-text-search-functions/match/ 7936 expressions = [] 7937 table = self._parse_table() 7938 if table: 7939 expressions = [table] 7940 else: 7941 expressions = self._parse_csv(self._parse_column) 7942 7943 self._match_text_seq(")", "AGAINST", "(") 7944 7945 this = self._parse_string() 7946 7947 if self._match_text_seq("IN", "NATURAL", "LANGUAGE", "MODE"): 7948 modifier = "IN NATURAL LANGUAGE MODE" 7949 if self._match_text_seq("WITH", "QUERY", "EXPANSION"): 7950 modifier = f"{modifier} WITH QUERY EXPANSION" 7951 elif self._match_text_seq("IN", "BOOLEAN", "MODE"): 7952 modifier = "IN BOOLEAN MODE" 7953 elif self._match_text_seq("WITH", "QUERY", "EXPANSION"): 7954 modifier = "WITH QUERY EXPANSION" 7955 else: 7956 modifier = None 7957 7958 return self.expression( 7959 exp.MatchAgainst(this=this, expressions=expressions, modifier=modifier) 7960 ) 7961 7962 # https://learn.microsoft.com/en-us/sql/t-sql/functions/openjson-transact-sql?view=sql-server-ver16 7963 def _parse_open_json(self) -> exp.OpenJSON: 7964 this = self._parse_bitwise() 7965 path = self._match(TokenType.COMMA) and self._parse_string() 7966 7967 def _parse_open_json_column_def() -> exp.OpenJSONColumnDef: 7968 this = self._parse_field(any_token=True) 7969 kind = self._parse_types() 7970 path = self._parse_string() 7971 as_json = self._match_pair(TokenType.ALIAS, TokenType.JSON) 7972 7973 return self.expression( 7974 exp.OpenJSONColumnDef(this=this, kind=kind, path=path, as_json=as_json) 7975 ) 7976 7977 expressions = None 7978 if self._match_pair(TokenType.R_PAREN, TokenType.WITH): 7979 self._match_l_paren() 7980 expressions = self._parse_csv(_parse_open_json_column_def) 7981 7982 return self.expression(exp.OpenJSON(this=this, path=path, expressions=expressions)) 7983 7984 def _parse_position(self, haystack_first: bool = False) -> exp.StrPosition: 7985 args = self._parse_csv(self._parse_bitwise) 7986 7987 if self._match(TokenType.IN): 7988 return self.expression( 7989 exp.StrPosition(this=self._parse_bitwise(), substr=seq_get(args, 0)) 7990 ) 7991 7992 if haystack_first: 7993 haystack = seq_get(args, 0) 7994 needle = seq_get(args, 1) 7995 else: 7996 haystack = seq_get(args, 1) 7997 needle = seq_get(args, 0) 7998 7999 return self.expression( 8000 exp.StrPosition(this=haystack, substr=needle, position=seq_get(args, 2)) 8001 ) 8002 8003 def _parse_join_hint(self, func_name: str) -> exp.JoinHint: 8004 args = self._parse_csv(self._parse_table) 8005 return exp.JoinHint(this=func_name.upper(), expressions=args) 8006 8007 def _parse_substring(self) -> exp.Substring: 8008 # Postgres supports the form: substring(string [from int] [for int]) 8009 # (despite being undocumented, the reverse order also works) 8010 # https://www.postgresql.org/docs/9.1/functions-string.html @ Table 9-6 8011 8012 args = t.cast(list[t.Optional[exp.Expr]], self._parse_csv(self._parse_bitwise)) 8013 8014 start, length = None, None 8015 8016 while self._curr: 8017 if self._match(TokenType.FROM): 8018 start = self._parse_bitwise() 8019 elif self._match(TokenType.FOR): 8020 if not start: 8021 start = exp.Literal.number(1) 8022 length = self._parse_bitwise() 8023 else: 8024 break 8025 8026 if start: 8027 args.append(start) 8028 if length: 8029 args.append(length) 8030 8031 return self.validate_expression(exp.Substring.from_arg_list(args), args) 8032 8033 def _parse_trim(self) -> exp.Trim: 8034 # https://www.w3resource.com/sql/character-functions/trim.php 8035 # https://docs.oracle.com/javadb/10.8.3.0/ref/rreftrimfunc.html 8036 8037 position = None 8038 collation = None 8039 expression = None 8040 8041 if self._match_texts(self.TRIM_TYPES): 8042 position = self._prev.text.upper() 8043 8044 this = self._parse_bitwise() 8045 if self._match_set((TokenType.FROM, TokenType.COMMA)): 8046 invert_order = self._prev.token_type == TokenType.FROM or self.TRIM_PATTERN_FIRST 8047 expression = self._parse_bitwise() 8048 8049 if invert_order: 8050 this, expression = expression, this 8051 8052 if self._match(TokenType.COLLATE): 8053 collation = self._parse_bitwise() 8054 8055 return self.expression( 8056 exp.Trim(this=this, position=position, expression=expression, collation=collation) 8057 ) 8058 8059 def _parse_window_clause(self) -> list[exp.Expr] | None: 8060 return self._parse_csv(self._parse_named_window) if self._match(TokenType.WINDOW) else None 8061 8062 def _parse_named_window(self) -> exp.Expr | None: 8063 return self._parse_window(self._parse_id_var(), alias=True) 8064 8065 def _parse_respect_or_ignore_nulls(self, this: exp.Expr | None) -> exp.Expr | None: 8066 if self._curr.token_type == TokenType.VAR: 8067 if self._match_text_seq("IGNORE", "NULLS"): 8068 return self.expression(exp.IgnoreNulls(this=this)) 8069 if self._match_text_seq("RESPECT", "NULLS"): 8070 return self.expression(exp.RespectNulls(this=this)) 8071 return this 8072 8073 def _parse_having_max(self, this: exp.Expr | None) -> exp.Expr | None: 8074 if self._match(TokenType.HAVING): 8075 self._match_texts(("MAX", "MIN")) 8076 max = self._prev.text.upper() != "MIN" 8077 return self.expression( 8078 exp.HavingMax(this=this, expression=self._parse_column(), max=max) 8079 ) 8080 8081 return this 8082 8083 def _parse_window(self, this: exp.Expr | None, alias: bool = False) -> exp.Expr | None: 8084 func = this 8085 comments = func.comments if isinstance(func, exp.Expr) else None 8086 8087 # T-SQL allows the OVER (...) syntax after WITHIN GROUP. 8088 # https://learn.microsoft.com/en-us/sql/t-sql/functions/percentile-disc-transact-sql?view=sql-server-ver16 8089 if self._match_text_seq("WITHIN", "GROUP"): 8090 order = self._parse_wrapped(self._parse_order) 8091 this = self.expression(exp.WithinGroup(this=this, expression=order)) 8092 8093 if self._match_pair(TokenType.FILTER, TokenType.L_PAREN): 8094 self._match(TokenType.WHERE) 8095 this = self.expression( 8096 exp.Filter(this=this, expression=self._parse_where(skip_where_token=True)) 8097 ) 8098 self._match_r_paren() 8099 8100 # SQL spec defines an optional [ { IGNORE | RESPECT } NULLS ] OVER 8101 # Some dialects choose to implement and some do not. 8102 # https://dev.mysql.com/doc/refman/8.0/en/window-function-descriptions.html 8103 8104 # There is some code above in _parse_lambda that handles 8105 # SELECT FIRST_VALUE(TABLE.COLUMN IGNORE|RESPECT NULLS) OVER ... 8106 8107 # The below changes handle 8108 # SELECT FIRST_VALUE(TABLE.COLUMN) IGNORE|RESPECT NULLS OVER ... 8109 8110 # Oracle allows both formats 8111 # (https://docs.oracle.com/en/database/oracle/oracle-database/19/sqlrf/img_text/first_value.html) 8112 # and Snowflake chose to do the same for familiarity 8113 # https://docs.snowflake.com/en/sql-reference/functions/first_value.html#usage-notes 8114 if isinstance(this, exp.AggFunc): 8115 ignore_respect = this.find(exp.IgnoreNulls, exp.RespectNulls) 8116 8117 if ignore_respect and ignore_respect is not this: 8118 ignore_respect.replace(ignore_respect.this) 8119 this = self.expression(ignore_respect.__class__(this=this)) 8120 8121 this = self._parse_respect_or_ignore_nulls(this) 8122 8123 # bigquery select from window x AS (partition by ...) 8124 if alias: 8125 over = None 8126 self._match(TokenType.ALIAS) 8127 elif not self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS): 8128 return this 8129 else: 8130 over = self._prev.text.upper() 8131 8132 if comments and isinstance(func, exp.Expr): 8133 func.pop_comments() 8134 8135 if not self._match(TokenType.L_PAREN): 8136 return self.expression( 8137 exp.Window(this=this, alias=self._parse_id_var(False), over=over), comments=comments 8138 ) 8139 8140 window_alias = self._parse_id_var(any_token=False, tokens=self.WINDOW_ALIAS_TOKENS) 8141 8142 first: bool | None = True if self._match(TokenType.FIRST) else None 8143 if self._match_text_seq("LAST"): 8144 first = False 8145 8146 partition, order = self._parse_partition_and_order() 8147 kind = ( 8148 self._match_set((TokenType.ROWS, TokenType.RANGE)) or self._match_text_seq("GROUPS") 8149 ) and self._prev.text 8150 8151 if kind: 8152 self._match(TokenType.BETWEEN) 8153 start = self._parse_window_spec() 8154 8155 end = self._parse_window_spec() if self._match(TokenType.AND) else {} 8156 exclude = ( 8157 self._parse_var_from_options(self.WINDOW_EXCLUDE_OPTIONS) 8158 if self._match_text_seq("EXCLUDE") 8159 else None 8160 ) 8161 8162 spec = self.expression( 8163 exp.WindowSpec( 8164 kind=kind, 8165 start=start["value"], 8166 start_side=start["side"], 8167 end=end.get("value"), 8168 end_side=end.get("side"), 8169 exclude=exclude, 8170 ) 8171 ) 8172 else: 8173 spec = None 8174 8175 self._match_r_paren() 8176 8177 window = self.expression( 8178 exp.Window( 8179 this=this, 8180 partition_by=partition, 8181 order=order, 8182 spec=spec, 8183 alias=window_alias, 8184 over=over, 8185 first=first, 8186 ), 8187 comments=comments, 8188 ) 8189 8190 # This covers Oracle's FIRST/LAST syntax: aggregate KEEP (...) OVER (...) 8191 if self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS, advance=False): 8192 return self._parse_window(window, alias=alias) 8193 8194 return window 8195 8196 def _parse_partition_and_order( 8197 self, 8198 ) -> tuple[list[exp.Expr], exp.Expr | None]: 8199 return self._parse_partition_by(), self._parse_order() 8200 8201 def _parse_window_spec(self) -> dict[str, str | exp.Expr | None]: 8202 self._match(TokenType.BETWEEN) 8203 8204 return { 8205 "value": ( 8206 (self._match_text_seq("UNBOUNDED") and "UNBOUNDED") 8207 or (self._match_text_seq("CURRENT", "ROW") and "CURRENT ROW") 8208 or self._parse_bitwise() 8209 ), 8210 "side": self._prev.text if self._match_texts(self.WINDOW_SIDES) else None, 8211 } 8212 8213 def _parse_alias(self, this: exp.Expr | None, explicit: bool = False) -> exp.Expr | None: 8214 # In some dialects, LIMIT and OFFSET can act as both identifiers and keywords (clauses) 8215 # so this section tries to parse the clause version and if it fails, it treats the token 8216 # as an identifier (alias) 8217 if self._can_parse_limit_or_offset(): 8218 return this 8219 8220 any_token = self._match(TokenType.ALIAS) 8221 comments = self._prev_comments 8222 8223 if explicit and not any_token: 8224 return this 8225 8226 if self._match(TokenType.L_PAREN): 8227 aliases = self.expression( 8228 exp.Aliases( 8229 this=this, expressions=self._parse_csv(lambda: self._parse_id_var(any_token)) 8230 ), 8231 comments=comments, 8232 ) 8233 self._match_r_paren(aliases) 8234 return aliases 8235 8236 alias = self._parse_id_var(any_token, tokens=self.ALIAS_TOKENS) or ( 8237 self.STRING_ALIASES and self._parse_string_as_identifier() 8238 ) 8239 8240 if alias: 8241 comments.extend(alias.pop_comments()) 8242 this = self.expression(exp.Alias(this=this, alias=alias), comments=comments) 8243 column = this.this 8244 8245 # Moves the comment next to the alias in `expr /* comment */ AS alias` 8246 if not this.comments and column and column.comments: 8247 this.comments = column.pop_comments() 8248 8249 return this 8250 8251 def _parse_id_var( 8252 self, 8253 any_token: bool = True, 8254 tokens: t.Collection[TokenType] | None = None, 8255 ) -> exp.Expr | None: 8256 expression = self._parse_identifier() 8257 if not expression and ( 8258 (any_token and self._advance_any()) or self._match_set(tokens or self.ID_VAR_TOKENS) 8259 ): 8260 quoted = self._prev.token_type == TokenType.STRING 8261 expression = self._identifier_expression(quoted=quoted) 8262 8263 return expression 8264 8265 def _parse_string(self) -> exp.Expr | None: 8266 if self._match_set(self.STRING_PARSERS): 8267 return self.STRING_PARSERS[self._prev.token_type](self, self._prev) 8268 return self._parse_placeholder() 8269 8270 def _parse_string_as_identifier(self) -> exp.Identifier | None: 8271 if not self._match(TokenType.STRING): 8272 return None 8273 output = exp.to_identifier(self._prev.text, quoted=True) 8274 output.update_positions(self._prev) 8275 return output 8276 8277 def _parse_number(self) -> exp.Expr | None: 8278 if self._match_set(self.NUMERIC_PARSERS): 8279 return self.NUMERIC_PARSERS[self._prev.token_type](self, self._prev) 8280 return self._parse_placeholder() 8281 8282 def _parse_identifier(self) -> exp.Expr | None: 8283 if self._match(TokenType.IDENTIFIER): 8284 return self._identifier_expression(quoted=True) 8285 return self._parse_placeholder() 8286 8287 def _parse_var( 8288 self, 8289 any_token: bool = False, 8290 tokens: t.Collection[TokenType] | None = None, 8291 upper: bool = False, 8292 ) -> exp.Expr | None: 8293 if ( 8294 (any_token and self._advance_any()) 8295 or self._match(TokenType.VAR) 8296 or (self._match_set(tokens) if tokens else False) 8297 ): 8298 return self.expression( 8299 exp.Var(this=self._prev.text.upper() if upper else self._prev.text) 8300 ) 8301 return self._parse_placeholder() 8302 8303 def _advance_any(self, ignore_reserved: bool = False) -> Token | None: 8304 if self._curr and (ignore_reserved or self._curr.token_type not in self.RESERVED_TOKENS): 8305 self._advance() 8306 return self._prev 8307 return None 8308 8309 def _parse_var_or_string(self, upper: bool = False) -> exp.Expr | None: 8310 return self._parse_string() or self._parse_var(any_token=True, upper=upper) 8311 8312 def _parse_primary_or_var(self) -> exp.Expr | None: 8313 return self._parse_primary() or self._parse_var(any_token=True) 8314 8315 def _parse_null(self) -> exp.Expr | None: 8316 if self._match_set((TokenType.NULL, TokenType.UNKNOWN)): 8317 return self.PRIMARY_PARSERS[TokenType.NULL](self, self._prev) 8318 return self._parse_placeholder() 8319 8320 def _parse_boolean(self) -> exp.Expr | None: 8321 if self._match(TokenType.TRUE): 8322 return self.PRIMARY_PARSERS[TokenType.TRUE](self, self._prev) 8323 if self._match(TokenType.FALSE): 8324 return self.PRIMARY_PARSERS[TokenType.FALSE](self, self._prev) 8325 return self._parse_placeholder() 8326 8327 def _parse_star(self) -> exp.Expr | None: 8328 if self._match(TokenType.STAR): 8329 return self.PRIMARY_PARSERS[TokenType.STAR](self, self._prev) 8330 return self._parse_placeholder() 8331 8332 def _parse_parameter(self) -> exp.Parameter: 8333 this = self._parse_identifier() or self._parse_primary_or_var() 8334 return self.expression(exp.Parameter(this=this)) 8335 8336 def _parse_placeholder(self) -> exp.Expr | None: 8337 if self._match_set(self.PLACEHOLDER_PARSERS): 8338 placeholder = self.PLACEHOLDER_PARSERS[self._prev.token_type](self) 8339 if placeholder: 8340 return placeholder 8341 self._advance(-1) 8342 return None 8343 8344 def _parse_star_op(self, *keywords: str) -> list[exp.Expr] | None: 8345 if not self._match_texts(keywords): 8346 return None 8347 if self._match(TokenType.L_PAREN, advance=False): 8348 return self._parse_wrapped_csv(self._parse_expression) 8349 8350 expression = self._parse_alias(self._parse_disjunction(), explicit=True) 8351 return [expression] if expression else None 8352 8353 def _parse_csv( 8354 self, parse_method: t.Callable[[], T | None], sep: TokenType = TokenType.COMMA 8355 ) -> list[T]: 8356 parse_result = parse_method() 8357 items = [parse_result] if parse_result is not None else [] 8358 8359 while self._match(sep): 8360 if isinstance(parse_result, exp.Expr): 8361 self._add_comments(parse_result) 8362 parse_result = parse_method() 8363 if parse_result is not None: 8364 items.append(parse_result) 8365 8366 return items 8367 8368 def _parse_wrapped_id_vars(self, optional: bool = False) -> list[exp.Expr]: 8369 return self._parse_wrapped_csv(self._parse_id_var, optional=optional) 8370 8371 def _parse_wrapped_csv( 8372 self, 8373 parse_method: t.Callable[[], T | None], 8374 sep: TokenType = TokenType.COMMA, 8375 optional: bool = False, 8376 ) -> list[T]: 8377 return self._parse_wrapped( 8378 lambda: self._parse_csv(parse_method, sep=sep), optional=optional 8379 ) 8380 8381 def _parse_wrapped(self, parse_method: t.Callable[[], T], optional: bool = False) -> T: 8382 wrapped = self._match(TokenType.L_PAREN) 8383 if not wrapped and not optional: 8384 self.raise_error("Expecting (") 8385 parse_result = parse_method() 8386 if wrapped: 8387 self._match_r_paren() 8388 return parse_result 8389 8390 def _parse_expressions(self) -> list[exp.Expr]: 8391 return self._parse_csv(self._parse_expression) 8392 8393 def _parse_select_or_expression(self, alias: bool = False) -> exp.Expr | None: 8394 return ( 8395 self._parse_set_operations( 8396 self._parse_alias(self._parse_assignment(), explicit=True) 8397 if alias 8398 else self._parse_assignment() 8399 ) 8400 or self._parse_select() 8401 ) 8402 8403 def _parse_ddl_select(self) -> exp.Expr | None: 8404 return self._parse_query_modifiers( 8405 self._parse_set_operations(self._parse_select(nested=True, parse_subquery_alias=False)) 8406 ) 8407 8408 def _parse_transaction(self) -> exp.Transaction | exp.Command: 8409 this = None 8410 if self._match_texts(self.TRANSACTION_KIND): 8411 this = self._prev.text 8412 8413 self._match_texts(("TRANSACTION", "WORK")) 8414 8415 modes = [] 8416 while True: 8417 mode = [] 8418 while self._match(TokenType.VAR) or self._match(TokenType.NOT): 8419 mode.append(self._prev.text) 8420 8421 if mode: 8422 modes.append(" ".join(mode)) 8423 if not self._match(TokenType.COMMA): 8424 break 8425 8426 return self.expression(exp.Transaction(this=this, modes=modes)) 8427 8428 def _parse_commit_or_rollback(self) -> exp.Commit | exp.Rollback: 8429 chain = None 8430 savepoint = None 8431 is_rollback = self._prev.token_type == TokenType.ROLLBACK 8432 8433 self._match_texts(("TRANSACTION", "WORK")) 8434 8435 if self._match_text_seq("TO"): 8436 self._match_text_seq("SAVEPOINT") 8437 savepoint = self._parse_id_var() 8438 8439 if self._match(TokenType.AND): 8440 chain = not self._match_text_seq("NO") 8441 self._match_text_seq("CHAIN") 8442 8443 if is_rollback: 8444 return self.expression(exp.Rollback(savepoint=savepoint)) 8445 8446 return self.expression(exp.Commit(chain=chain)) 8447 8448 def _parse_refresh(self) -> exp.Refresh | exp.Command: 8449 if self._match(TokenType.TABLE): 8450 kind = "TABLE" 8451 elif self._match_text_seq("MATERIALIZED", "VIEW"): 8452 kind = "MATERIALIZED VIEW" 8453 else: 8454 kind = "" 8455 8456 this = self._parse_string() or self._parse_table() 8457 if not kind and not isinstance(this, exp.Literal): 8458 return self._parse_as_command(self._prev) 8459 8460 return self.expression(exp.Refresh(this=this, kind=kind)) 8461 8462 def _parse_column_def_with_exists(self): 8463 start = self._index 8464 self._match(TokenType.COLUMN) 8465 8466 exists_column = self._parse_exists(not_=True) 8467 expression = self._parse_field_def() 8468 8469 if not isinstance(expression, exp.ColumnDef): 8470 self._retreat(start) 8471 return None 8472 8473 expression.set("exists", exists_column) 8474 8475 return expression 8476 8477 def _parse_add_column(self) -> exp.ColumnDef | None: 8478 if not self._prev.text.upper() == "ADD": 8479 return None 8480 8481 expression = self._parse_column_def_with_exists() 8482 if not expression: 8483 return None 8484 8485 # https://docs.databricks.com/delta/update-schema.html#explicitly-update-schema-to-add-columns 8486 if self._match_texts(("FIRST", "AFTER")): 8487 position = self._prev.text 8488 column_position = self.expression( 8489 exp.ColumnPosition(this=self._parse_column(), position=position) 8490 ) 8491 expression.set("position", column_position) 8492 8493 return expression 8494 8495 def _parse_drop_column(self) -> exp.Drop | exp.Command | None: 8496 drop = self._parse_drop() if self._match(TokenType.DROP) else None 8497 if drop and not isinstance(drop, exp.Command): 8498 drop.set("kind", drop.args.get("kind", "COLUMN")) 8499 return drop 8500 8501 # https://docs.aws.amazon.com/athena/latest/ug/alter-table-drop-partition.html 8502 def _parse_drop_partition(self, exists: bool | None = None) -> exp.DropPartition: 8503 return self.expression( 8504 exp.DropPartition(expressions=self._parse_csv(self._parse_partition), exists=exists) 8505 ) 8506 8507 def _parse_alter_table_add(self) -> list[exp.Expr]: 8508 def _parse_add_alteration() -> exp.Expr | None: 8509 self._match_text_seq("ADD") 8510 if self._match_set(self.ADD_CONSTRAINT_TOKENS, advance=False): 8511 return self.expression( 8512 exp.AddConstraint(expressions=self._parse_csv(self._parse_constraint)) 8513 ) 8514 8515 column_def = self._parse_add_column() 8516 if isinstance(column_def, exp.ColumnDef): 8517 return column_def 8518 8519 exists = self._parse_exists(not_=True) 8520 if self._match_pair(TokenType.PARTITION, TokenType.L_PAREN, advance=False): 8521 return self.expression( 8522 exp.AddPartition( 8523 exists=exists, 8524 this=self._parse_field(any_token=True), 8525 location=self._match_text_seq("LOCATION", advance=False) 8526 and self._parse_property(), 8527 ) 8528 ) 8529 8530 return None 8531 8532 if not self._match_set(self.ADD_CONSTRAINT_TOKENS, advance=False) and ( 8533 not self.dialect.ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN 8534 or self._match_text_seq("COLUMNS") 8535 ): 8536 schema = self._parse_schema() 8537 8538 return ( 8539 ensure_list(schema) 8540 if schema 8541 else self._parse_csv(self._parse_column_def_with_exists) 8542 ) 8543 8544 return self._parse_csv(_parse_add_alteration) 8545 8546 def _parse_alter_table_alter(self) -> exp.Expr | None: 8547 if self._match_texts(self.ALTER_ALTER_PARSERS): 8548 return self.ALTER_ALTER_PARSERS[self._prev.text.upper()](self) 8549 8550 # Many dialects support the ALTER [COLUMN] syntax, so if there is no 8551 # keyword after ALTER we default to parsing this statement 8552 self._match(TokenType.COLUMN) 8553 column = self._parse_field(any_token=True) 8554 8555 if self._match_pair(TokenType.DROP, TokenType.DEFAULT): 8556 return self.expression(exp.AlterColumn(this=column, drop=True)) 8557 if self._match_pair(TokenType.SET, TokenType.DEFAULT): 8558 return self.expression(exp.AlterColumn(this=column, default=self._parse_disjunction())) 8559 if self._match(TokenType.COMMENT): 8560 return self.expression(exp.AlterColumn(this=column, comment=self._parse_string())) 8561 if self._match_text_seq("DROP", "NOT", "NULL"): 8562 return self.expression(exp.AlterColumn(this=column, drop=True, allow_null=True)) 8563 if self._match_text_seq("SET", "NOT", "NULL"): 8564 return self.expression(exp.AlterColumn(this=column, allow_null=False)) 8565 8566 if self._match_text_seq("SET", "VISIBLE"): 8567 return self.expression(exp.AlterColumn(this=column, visible="VISIBLE")) 8568 if self._match_text_seq("SET", "INVISIBLE"): 8569 return self.expression(exp.AlterColumn(this=column, visible="INVISIBLE")) 8570 8571 self._match_text_seq("SET", "DATA") 8572 self._match_text_seq("TYPE") 8573 return self.expression( 8574 exp.AlterColumn( 8575 this=column, 8576 dtype=self._parse_types(), 8577 collate=self._match(TokenType.COLLATE) and self._parse_term(), 8578 using=self._match(TokenType.USING) and self._parse_disjunction(), 8579 ) 8580 ) 8581 8582 def _parse_alter_diststyle(self) -> exp.AlterDistStyle: 8583 if self._match_texts(("ALL", "EVEN", "AUTO")): 8584 return self.expression(exp.AlterDistStyle(this=exp.var(self._prev.text.upper()))) 8585 8586 self._match_text_seq("KEY", "DISTKEY") 8587 return self.expression(exp.AlterDistStyle(this=self._parse_column())) 8588 8589 def _parse_alter_sortkey(self, compound: bool | None = None) -> exp.AlterSortKey: 8590 if compound: 8591 self._match_text_seq("SORTKEY") 8592 8593 if self._match(TokenType.L_PAREN, advance=False): 8594 return self.expression( 8595 exp.AlterSortKey(expressions=self._parse_wrapped_id_vars(), compound=compound) 8596 ) 8597 8598 self._match_texts(("AUTO", "NONE")) 8599 return self.expression( 8600 exp.AlterSortKey(this=exp.var(self._prev.text.upper()), compound=compound) 8601 ) 8602 8603 def _parse_alter_table_drop(self) -> list[exp.Expr]: 8604 index = self._index - 1 8605 8606 partition_exists = self._parse_exists() 8607 if self._match(TokenType.PARTITION, advance=False): 8608 return self._parse_csv(lambda: self._parse_drop_partition(exists=partition_exists)) 8609 8610 self._retreat(index) 8611 return self._parse_csv(self._parse_drop_column) 8612 8613 def _parse_alter_table_rename(self) -> exp.AlterRename | exp.RenameColumn | None: 8614 if self._match(TokenType.COLUMN) or not self.ALTER_RENAME_REQUIRES_COLUMN: 8615 exists = self._parse_exists() 8616 old_column = self._parse_column() 8617 to = self._match_text_seq("TO") 8618 new_column = self._parse_column() 8619 8620 if old_column is None or not to or new_column is None: 8621 return None 8622 8623 return self.expression(exp.RenameColumn(this=old_column, to=new_column, exists=exists)) 8624 8625 self._match_text_seq("TO") 8626 return self.expression(exp.AlterRename(this=self._parse_table(schema=True))) 8627 8628 def _parse_alter_table_set(self) -> exp.AlterSet: 8629 alter_set = self.expression(exp.AlterSet()) 8630 8631 if self._match(TokenType.L_PAREN, advance=False) or self._match_text_seq( 8632 "TABLE", "PROPERTIES" 8633 ): 8634 alter_set.set("expressions", self._parse_wrapped_csv(self._parse_assignment)) 8635 elif self._match_text_seq("FILESTREAM_ON", advance=False): 8636 alter_set.set("expressions", [self._parse_assignment()]) 8637 elif self._match_texts(("LOGGED", "UNLOGGED")): 8638 alter_set.set("option", exp.var(self._prev.text.upper())) 8639 elif self._match_text_seq("WITHOUT") and self._match_texts(("CLUSTER", "OIDS")): 8640 alter_set.set("option", exp.var(f"WITHOUT {self._prev.text.upper()}")) 8641 elif self._match_text_seq("LOCATION"): 8642 alter_set.set("location", self._parse_field()) 8643 elif self._match_text_seq("ACCESS", "METHOD"): 8644 alter_set.set("access_method", self._parse_field()) 8645 elif self._match_text_seq("TABLESPACE"): 8646 alter_set.set("tablespace", self._parse_field()) 8647 elif self._match_text_seq("FILE", "FORMAT") or self._match_text_seq("FILEFORMAT"): 8648 alter_set.set("file_format", [self._parse_field()]) 8649 elif self._match_text_seq("STAGE_FILE_FORMAT"): 8650 alter_set.set("file_format", self._parse_wrapped_options()) 8651 elif self._match_text_seq("STAGE_COPY_OPTIONS"): 8652 alter_set.set("copy_options", self._parse_wrapped_options()) 8653 elif self._match_text_seq("TAG") or self._match_text_seq("TAGS"): 8654 alter_set.set("tag", self._parse_csv(self._parse_assignment)) 8655 else: 8656 if self._match_text_seq("SERDE"): 8657 alter_set.set("serde", self._parse_field()) 8658 8659 properties = self._parse_wrapped(self._parse_properties, optional=True) 8660 alter_set.set("expressions", [properties]) 8661 8662 return alter_set 8663 8664 def _parse_alter_session(self) -> exp.AlterSession: 8665 """Parse ALTER SESSION SET/UNSET statements.""" 8666 if self._match(TokenType.SET): 8667 expressions = self._parse_csv(lambda: self._parse_set_item_assignment()) 8668 return self.expression(exp.AlterSession(expressions=expressions, unset=False)) 8669 8670 self._match_text_seq("UNSET") 8671 expressions = self._parse_csv( 8672 lambda: self.expression(exp.SetItem(this=self._parse_id_var(any_token=True))) 8673 ) 8674 return self.expression(exp.AlterSession(expressions=expressions, unset=True)) 8675 8676 def _parse_alter(self) -> exp.Alter | exp.Command: 8677 start = self._prev 8678 8679 iceberg = self._match_text_seq("ICEBERG") 8680 8681 alter_token = self._match_set(self.ALTERABLES) and self._prev 8682 if not alter_token: 8683 return self._parse_as_command(start) 8684 if iceberg and alter_token.token_type != TokenType.TABLE: 8685 return self._parse_as_command(start) 8686 8687 exists = self._parse_exists() 8688 only = self._match_text_seq("ONLY") 8689 8690 if alter_token.token_type == TokenType.SESSION: 8691 this = None 8692 check = None 8693 cluster = None 8694 else: 8695 this = self._parse_table(schema=True, parse_partition=self.ALTER_TABLE_PARTITIONS) 8696 check = self._match_text_seq("WITH", "CHECK") 8697 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 8698 8699 if self._next: 8700 self._advance() 8701 8702 parser = self.ALTER_PARSERS.get(self._prev.text.upper()) if self._prev else None 8703 if parser: 8704 actions = ensure_list(parser(self)) 8705 not_valid = self._match_text_seq("NOT", "VALID") 8706 options = self._parse_csv(self._parse_property) 8707 cascade = self.dialect.ALTER_TABLE_SUPPORTS_CASCADE and self._match_text_seq("CASCADE") 8708 8709 if not self._curr and actions: 8710 return self.expression( 8711 exp.Alter( 8712 this=this, 8713 kind=alter_token.text.upper(), 8714 exists=exists, 8715 actions=actions, 8716 only=only, 8717 options=options, 8718 cluster=cluster, 8719 not_valid=not_valid, 8720 check=check, 8721 cascade=cascade, 8722 iceberg=iceberg, 8723 ) 8724 ) 8725 8726 return self._parse_as_command(start) 8727 8728 def _parse_analyze(self) -> exp.Analyze | exp.Command: 8729 start = self._prev 8730 # https://duckdb.org/docs/sql/statements/analyze 8731 if not self._curr: 8732 return self.expression(exp.Analyze()) 8733 8734 options = [] 8735 while self._match_texts(self.ANALYZE_STYLES): 8736 if self._prev.text.upper() == "BUFFER_USAGE_LIMIT": 8737 options.append(f"BUFFER_USAGE_LIMIT {self._parse_number()}") 8738 else: 8739 options.append(self._prev.text.upper()) 8740 8741 this: exp.Expr | None = None 8742 inner_expression: exp.Expr | None = None 8743 8744 kind = self._curr.text.upper() if self._curr else None 8745 8746 if self._match(TokenType.TABLE) or self._match(TokenType.INDEX): 8747 this = self._parse_table_parts() 8748 elif self._match_text_seq("TABLES"): 8749 if self._match_set((TokenType.FROM, TokenType.IN)): 8750 kind = f"{kind} {self._prev.text.upper()}" 8751 this = self._parse_table(schema=True, is_db_reference=True) 8752 elif self._match_text_seq("DATABASE"): 8753 this = self._parse_table(schema=True, is_db_reference=True) 8754 elif self._match_text_seq("CLUSTER"): 8755 this = self._parse_table() 8756 # Try matching inner expr keywords before fallback to parse table. 8757 elif self._match_texts(self.ANALYZE_EXPRESSION_PARSERS): 8758 kind = None 8759 inner_expression = self.ANALYZE_EXPRESSION_PARSERS[self._prev.text.upper()](self) 8760 else: 8761 # Empty kind https://prestodb.io/docs/current/sql/analyze.html 8762 kind = None 8763 this = self._parse_table_parts() 8764 8765 partition = self._try_parse(self._parse_partition) 8766 if not partition and self._match_texts(self.PARTITION_KEYWORDS): 8767 return self._parse_as_command(start) 8768 8769 # https://docs.starrocks.io/docs/sql-reference/sql-statements/cbo_stats/ANALYZE_TABLE/ 8770 if self._match_text_seq("WITH", "SYNC", "MODE") or self._match_text_seq( 8771 "WITH", "ASYNC", "MODE" 8772 ): 8773 mode = f"WITH {self._tokens[self._index - 2].text.upper()} MODE" 8774 else: 8775 mode = None 8776 8777 if self._match_texts(self.ANALYZE_EXPRESSION_PARSERS): 8778 inner_expression = self.ANALYZE_EXPRESSION_PARSERS[self._prev.text.upper()](self) 8779 8780 properties = self._parse_properties() 8781 return self.expression( 8782 exp.Analyze( 8783 kind=kind, 8784 this=this, 8785 mode=mode, 8786 partition=partition, 8787 properties=properties, 8788 expression=inner_expression, 8789 options=options, 8790 ) 8791 ) 8792 8793 # https://spark.apache.org/docs/3.5.1/sql-ref-syntax-aux-analyze-table.html 8794 def _parse_analyze_statistics(self) -> exp.AnalyzeStatistics: 8795 this = None 8796 kind = self._prev.text.upper() 8797 option = self._prev.text.upper() if self._match_text_seq("DELTA") else None 8798 expressions = [] 8799 8800 if not self._match_text_seq("STATISTICS"): 8801 self.raise_error("Expecting token STATISTICS") 8802 8803 if self._match_text_seq("NOSCAN"): 8804 this = "NOSCAN" 8805 elif self._match(TokenType.FOR): 8806 if self._match_text_seq("ALL", "COLUMNS"): 8807 this = "FOR ALL COLUMNS" 8808 if self._match_texts("COLUMNS"): 8809 this = "FOR COLUMNS" 8810 expressions = self._parse_csv(self._parse_column_reference) 8811 elif self._match_text_seq("SAMPLE"): 8812 sample = self._parse_number() 8813 expressions = [ 8814 self.expression( 8815 exp.AnalyzeSample( 8816 sample=sample, 8817 kind=self._prev.text.upper() if self._match(TokenType.PERCENT) else None, 8818 ) 8819 ) 8820 ] 8821 8822 return self.expression( 8823 exp.AnalyzeStatistics(kind=kind, option=option, this=this, expressions=expressions) 8824 ) 8825 8826 # https://docs.oracle.com/en/database/oracle/oracle-database/21/sqlrf/ANALYZE.html 8827 def _parse_analyze_validate(self) -> exp.AnalyzeValidate: 8828 kind = None 8829 this = None 8830 expression: exp.Expr | None = None 8831 if self._match_text_seq("REF", "UPDATE"): 8832 kind = "REF" 8833 this = "UPDATE" 8834 if self._match_text_seq("SET", "DANGLING", "TO", "NULL"): 8835 this = "UPDATE SET DANGLING TO NULL" 8836 elif self._match_text_seq("STRUCTURE"): 8837 kind = "STRUCTURE" 8838 if self._match_text_seq("CASCADE", "FAST"): 8839 this = "CASCADE FAST" 8840 elif self._match_text_seq("CASCADE", "COMPLETE") and self._match_texts( 8841 ("ONLINE", "OFFLINE") 8842 ): 8843 this = f"CASCADE COMPLETE {self._prev.text.upper()}" 8844 expression = self._parse_into() 8845 8846 return self.expression(exp.AnalyzeValidate(kind=kind, this=this, expression=expression)) 8847 8848 def _parse_analyze_columns(self) -> exp.AnalyzeColumns | None: 8849 this = self._prev.text.upper() 8850 if self._match_text_seq("COLUMNS"): 8851 return self.expression(exp.AnalyzeColumns(this=f"{this} {self._prev.text.upper()}")) 8852 return None 8853 8854 def _parse_analyze_delete(self) -> exp.AnalyzeDelete | None: 8855 kind = self._prev.text.upper() if self._match_text_seq("SYSTEM") else None 8856 if self._match_text_seq("STATISTICS"): 8857 return self.expression(exp.AnalyzeDelete(kind=kind)) 8858 return None 8859 8860 def _parse_analyze_list(self) -> exp.AnalyzeListChainedRows | None: 8861 if self._match_text_seq("CHAINED", "ROWS"): 8862 return self.expression(exp.AnalyzeListChainedRows(expression=self._parse_into())) 8863 return None 8864 8865 # https://dev.mysql.com/doc/refman/8.4/en/analyze-table.html 8866 def _parse_analyze_histogram(self) -> exp.AnalyzeHistogram: 8867 this = self._prev.text.upper() 8868 expression: exp.Expr | None = None 8869 expressions = [] 8870 update_options = None 8871 8872 if self._match_text_seq("HISTOGRAM", "ON"): 8873 expressions = self._parse_csv(self._parse_column_reference) 8874 with_expressions = [] 8875 while self._match(TokenType.WITH): 8876 # https://docs.starrocks.io/docs/sql-reference/sql-statements/cbo_stats/ANALYZE_TABLE/ 8877 if self._match_texts(("SYNC", "ASYNC")): 8878 if self._match_text_seq("MODE", advance=False): 8879 with_expressions.append(f"{self._prev.text.upper()} MODE") 8880 self._advance() 8881 else: 8882 buckets = self._parse_number() 8883 if self._match_text_seq("BUCKETS"): 8884 with_expressions.append(f"{buckets} BUCKETS") 8885 if with_expressions: 8886 expression = self.expression(exp.AnalyzeWith(expressions=with_expressions)) 8887 8888 if self._match_texts(("MANUAL", "AUTO")) and self._match( 8889 TokenType.UPDATE, advance=False 8890 ): 8891 update_options = self._prev.text.upper() 8892 self._advance() 8893 elif self._match_text_seq("USING", "DATA"): 8894 expression = self.expression(exp.UsingData(this=self._parse_string())) 8895 8896 return self.expression( 8897 exp.AnalyzeHistogram( 8898 this=this, 8899 expressions=expressions, 8900 expression=expression, 8901 update_options=update_options, 8902 ) 8903 ) 8904 8905 def _parse_merge(self) -> exp.Merge: 8906 self._match(TokenType.INTO) 8907 target = self._parse_table() 8908 8909 if target and self._match(TokenType.ALIAS, advance=False): 8910 target.set("alias", self._parse_table_alias()) 8911 8912 self._match(TokenType.USING) 8913 using = self._parse_table() 8914 8915 return self.expression( 8916 exp.Merge( 8917 this=target, 8918 using=using, 8919 on=self._match(TokenType.ON) and self._parse_disjunction(), 8920 using_cond=self._match(TokenType.USING) and self._parse_using_identifiers(), 8921 whens=self._parse_when_matched(), 8922 returning=self._parse_returning(), 8923 ) 8924 ) 8925 8926 def _parse_when_matched(self) -> exp.Whens: 8927 whens = [] 8928 8929 while self._match(TokenType.WHEN): 8930 matched = not self._match(TokenType.NOT) 8931 self._match_text_seq("MATCHED") 8932 source = ( 8933 False 8934 if self._match_text_seq("BY", "TARGET") 8935 else self._match_text_seq("BY", "SOURCE") 8936 ) 8937 condition = self._parse_disjunction() if self._match(TokenType.AND) else None 8938 8939 self._match(TokenType.THEN) 8940 8941 if self._match(TokenType.INSERT): 8942 this = self._parse_star() 8943 if this: 8944 then: exp.Expr | None = self.expression(exp.Insert(this=this)) 8945 else: 8946 then = self.expression( 8947 exp.Insert( 8948 this=exp.var("ROW") 8949 if self._match_text_seq("ROW") 8950 else self._parse_value(values=False), 8951 expression=self._match_text_seq("VALUES") and self._parse_value(), 8952 where=self._parse_where(), 8953 ) 8954 ) 8955 elif self._match(TokenType.UPDATE): 8956 expressions = self._parse_star() 8957 if expressions: 8958 then = self.expression(exp.Update(expressions=expressions)) 8959 else: 8960 then = self.expression( 8961 exp.Update( 8962 expressions=self._match(TokenType.SET) 8963 and self._parse_csv(self._parse_equality), 8964 where=self._parse_where(), 8965 ) 8966 ) 8967 elif self._match(TokenType.DELETE): 8968 then = self.expression(exp.Var(this=self._prev.text)) 8969 else: 8970 then = self._parse_var_from_options(self.CONFLICT_ACTIONS) 8971 8972 whens.append( 8973 self.expression( 8974 exp.When(matched=matched, source=source, condition=condition, then=then) 8975 ) 8976 ) 8977 return self.expression(exp.Whens(expressions=whens)) 8978 8979 def _parse_show(self) -> exp.Expr | None: 8980 parser = self._find_parser(self.SHOW_PARSERS, self.SHOW_TRIE) 8981 if parser: 8982 return parser(self) 8983 return self._parse_as_command(self._prev) 8984 8985 def _parse_set_item_assignment(self, kind: str | None = None) -> exp.Expr | None: 8986 index = self._index 8987 8988 if kind in ("GLOBAL", "SESSION") and self._match_text_seq("TRANSACTION"): 8989 return self._parse_set_transaction(global_=kind == "GLOBAL") 8990 8991 left = self._parse_primary() or self._parse_column() 8992 assignment_delimiter = self._match_texts(self.SET_ASSIGNMENT_DELIMITERS) 8993 8994 if not left or (self.SET_REQUIRES_ASSIGNMENT_DELIMITER and not assignment_delimiter): 8995 self._retreat(index) 8996 return None 8997 8998 right = self._parse_statement() or self._parse_id_var() 8999 if isinstance(right, (exp.Column, exp.Identifier)): 9000 right = exp.var(right.name) 9001 9002 this = self.expression(exp.EQ(this=left, expression=right)) 9003 return self.expression(exp.SetItem(this=this, kind=kind)) 9004 9005 def _parse_set_transaction(self, global_: bool = False) -> exp.Expr: 9006 self._match_text_seq("TRANSACTION") 9007 characteristics = self._parse_csv( 9008 lambda: self._parse_var_from_options(self.TRANSACTION_CHARACTERISTICS) 9009 ) 9010 return self.expression( 9011 exp.SetItem(expressions=characteristics, kind="TRANSACTION", global_=global_) 9012 ) 9013 9014 def _parse_set_item(self) -> exp.Expr | None: 9015 parser = self._find_parser(self.SET_PARSERS, self.SET_TRIE) 9016 return parser(self) if parser else self._parse_set_item_assignment(kind=None) 9017 9018 def _parse_set(self, unset: bool = False, tag: bool = False) -> exp.Set | exp.Command: 9019 index = self._index 9020 set_ = self.expression( 9021 exp.Set(expressions=self._parse_csv(self._parse_set_item), unset=unset, tag=tag) 9022 ) 9023 9024 if self._curr: 9025 self._retreat(index) 9026 return self._parse_as_command(self._prev) 9027 9028 return set_ 9029 9030 def _parse_var_from_options( 9031 self, options: OPTIONS_TYPE, raise_unmatched: bool = True 9032 ) -> exp.Var | None: 9033 start = self._curr 9034 if not start: 9035 return None 9036 9037 option = start.text.upper() 9038 continuations = options.get(option) 9039 9040 index = self._index 9041 self._advance() 9042 for keywords in continuations or []: 9043 if isinstance(keywords, str): 9044 keywords = (keywords,) 9045 9046 if self._match_text_seq(*keywords): 9047 option = f"{option} {' '.join(keywords)}" 9048 break 9049 else: 9050 if continuations or continuations is None: 9051 if raise_unmatched: 9052 self.raise_error(f"Unknown option {option}") 9053 9054 self._retreat(index) 9055 return None 9056 9057 return exp.var(option) 9058 9059 def _parse_as_command(self, start: Token) -> exp.Command: 9060 while self._curr: 9061 self._advance() 9062 text = self._find_sql(start, self._prev) 9063 size = len(start.text) 9064 self._warn_unsupported() 9065 return exp.Command(this=text[:size], expression=text[size:]) 9066 9067 def _parse_dict_property(self, this: str) -> exp.DictProperty: 9068 settings = [] 9069 9070 self._match_l_paren() 9071 kind = self._parse_id_var() 9072 9073 if self._match(TokenType.L_PAREN): 9074 while True: 9075 key = self._parse_id_var() 9076 value = self._parse_function() or self._parse_primary_or_var() 9077 if not key and value is None: 9078 break 9079 settings.append(self.expression(exp.DictSubProperty(this=key, value=value))) 9080 self._match(TokenType.R_PAREN) 9081 9082 self._match_r_paren() 9083 9084 return self.expression( 9085 exp.DictProperty(this=this, kind=kind.this if kind else None, settings=settings) 9086 ) 9087 9088 def _parse_dict_range(self, this: str) -> exp.DictRange: 9089 self._match_l_paren() 9090 has_min = self._match_text_seq("MIN") 9091 if has_min: 9092 min = self._parse_var() or self._parse_primary() 9093 self._match_text_seq("MAX") 9094 max = self._parse_var() or self._parse_primary() 9095 else: 9096 max = self._parse_var() or self._parse_primary() 9097 min = exp.Literal.number(0) 9098 self._match_r_paren() 9099 return self.expression(exp.DictRange(this=this, min=min, max=max)) 9100 9101 def _parse_comprehension(self, this: exp.Expr | None) -> exp.Comprehension | None: 9102 index = self._index 9103 expression = self._parse_column() 9104 position = self._match(TokenType.COMMA) and self._parse_column() 9105 9106 if not self._match(TokenType.IN): 9107 self._retreat(index - 1) 9108 return None 9109 iterator = self._parse_column() 9110 condition = self._parse_disjunction() if self._match_text_seq("IF") else None 9111 return self.expression( 9112 exp.Comprehension( 9113 this=this, 9114 expression=expression, 9115 position=position, 9116 iterator=iterator, 9117 condition=condition, 9118 ) 9119 ) 9120 9121 def _parse_heredoc(self) -> exp.Heredoc | None: 9122 if self._match(TokenType.HEREDOC_STRING): 9123 return self.expression(exp.Heredoc(this=self._prev.text)) 9124 9125 if not self._match_text_seq("$"): 9126 return None 9127 9128 tags = ["$"] 9129 tag_text = None 9130 9131 if self._is_connected(): 9132 self._advance() 9133 tags.append(self._prev.text.upper()) 9134 else: 9135 self.raise_error("No closing $ found") 9136 9137 if tags[-1] != "$": 9138 if self._is_connected() and self._match_text_seq("$"): 9139 tag_text = tags[-1] 9140 tags.append("$") 9141 else: 9142 self.raise_error("No closing $ found") 9143 9144 heredoc_start = self._curr 9145 9146 while self._curr: 9147 if self._match_text_seq(*tags, advance=False): 9148 this = self._find_sql(heredoc_start, self._prev) 9149 self._advance(len(tags)) 9150 return self.expression(exp.Heredoc(this=this, tag=tag_text)) 9151 9152 self._advance() 9153 9154 self.raise_error(f"No closing {''.join(tags)} found") 9155 return None 9156 9157 def _find_parser(self, parsers: dict[str, t.Callable], trie: dict) -> t.Callable | None: 9158 if not self._curr: 9159 return None 9160 9161 index = self._index 9162 this = [] 9163 while True: 9164 # The current token might be multiple words 9165 curr = self._curr.text.upper() 9166 key = curr.split(" ") 9167 this.append(curr) 9168 9169 self._advance() 9170 result, trie = in_trie(trie, key) 9171 if result == TrieResult.FAILED: 9172 break 9173 9174 if result == TrieResult.EXISTS: 9175 subparser = parsers[" ".join(this)] 9176 return subparser 9177 9178 self._retreat(index) 9179 return None 9180 9181 def _match_l_paren(self, expression: exp.Expr | None = None) -> None: 9182 if not self._match(TokenType.L_PAREN, expression=expression): 9183 self.raise_error("Expecting (") 9184 9185 def _match_r_paren(self, expression: exp.Expr | None = None) -> None: 9186 if not self._match(TokenType.R_PAREN, expression=expression): 9187 self.raise_error("Expecting )") 9188 9189 def _replace_lambda( 9190 self, node: exp.Expr | None, expressions: list[exp.Expr] 9191 ) -> exp.Expr | None: 9192 if not node: 9193 return node 9194 9195 lambda_types = {e.name: e.args.get("to") or False for e in expressions} 9196 9197 for column in node.find_all(exp.Column): 9198 typ = lambda_types.get(column.parts[0].name) 9199 if typ is not None: 9200 dot_or_id = column.to_dot() if column.table else column.this 9201 9202 if typ: 9203 dot_or_id = self.expression(exp.Cast(this=dot_or_id, to=typ)) 9204 9205 parent = column.parent 9206 9207 while isinstance(parent, exp.Dot): 9208 if not isinstance(parent.parent, exp.Dot): 9209 parent.replace(dot_or_id) 9210 break 9211 parent = parent.parent 9212 else: 9213 if column is node: 9214 node = dot_or_id 9215 else: 9216 column.replace(dot_or_id) 9217 return node 9218 9219 def _parse_truncate_table(self) -> exp.TruncateTable | None | exp.Expr: 9220 start = self._prev 9221 9222 # Not to be confused with TRUNCATE(number, decimals) function call 9223 if self._match(TokenType.L_PAREN): 9224 self._retreat(self._index - 2) 9225 return self._parse_function() 9226 9227 # Clickhouse supports TRUNCATE DATABASE as well 9228 is_database = self._match(TokenType.DATABASE) 9229 9230 self._match(TokenType.TABLE) 9231 9232 exists = self._parse_exists(not_=False) 9233 9234 expressions = self._parse_csv( 9235 lambda: self._parse_table(schema=True, is_db_reference=is_database) 9236 ) 9237 9238 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 9239 9240 if self._match_text_seq("RESTART", "IDENTITY"): 9241 identity = "RESTART" 9242 elif self._match_text_seq("CONTINUE", "IDENTITY"): 9243 identity = "CONTINUE" 9244 else: 9245 identity = None 9246 9247 if self._match_text_seq("CASCADE") or self._match_text_seq("RESTRICT"): 9248 option = self._prev.text 9249 else: 9250 option = None 9251 9252 partition = self._parse_partition() 9253 9254 # Fallback case 9255 if self._curr: 9256 return self._parse_as_command(start) 9257 9258 return self.expression( 9259 exp.TruncateTable( 9260 expressions=expressions, 9261 is_database=is_database, 9262 exists=exists, 9263 cluster=cluster, 9264 identity=identity, 9265 option=option, 9266 partition=partition, 9267 ) 9268 ) 9269 9270 def _parse_with_operator(self) -> exp.Expr | None: 9271 this = self._parse_ordered(self._parse_opclass) 9272 9273 if not self._match(TokenType.WITH): 9274 return this 9275 9276 op = self._parse_var(any_token=True, tokens=self.RESERVED_TOKENS) 9277 9278 return self.expression(exp.WithOperator(this=this, op=op)) 9279 9280 def _parse_wrapped_options(self) -> list[exp.Expr]: 9281 self._match(TokenType.EQ) 9282 self._match(TokenType.L_PAREN) 9283 9284 opts: list[exp.Expr] = [] 9285 option: exp.Expr | list[exp.Expr] | None 9286 while self._curr and not self._match(TokenType.R_PAREN): 9287 if self._match_text_seq("FORMAT_NAME", "="): 9288 # The FORMAT_NAME can be set to an identifier for Snowflake and T-SQL 9289 option = self._parse_format_name() 9290 else: 9291 option = self._parse_property() 9292 9293 if option is None: 9294 self.raise_error("Unable to parse option") 9295 break 9296 9297 opts.extend(ensure_list(option)) 9298 9299 return opts 9300 9301 def _parse_copy_parameters(self) -> list[exp.CopyParameter]: 9302 sep = TokenType.COMMA if self.dialect.COPY_PARAMS_ARE_CSV else None 9303 9304 options = [] 9305 while self._curr and not self._match(TokenType.R_PAREN, advance=False): 9306 option = self._parse_var(any_token=True) 9307 prev = self._prev.text.upper() 9308 9309 # Different dialects might separate options and values by white space, "=" and "AS" 9310 self._match(TokenType.EQ) 9311 self._match(TokenType.ALIAS) 9312 9313 param = self.expression(exp.CopyParameter(this=option)) 9314 9315 if prev in self.COPY_INTO_VARLEN_OPTIONS and self._match( 9316 TokenType.L_PAREN, advance=False 9317 ): 9318 # Snowflake FILE_FORMAT case, Databricks COPY & FORMAT options 9319 param.set("expressions", self._parse_wrapped_options()) 9320 elif prev == "FILE_FORMAT": 9321 # T-SQL's external file format case 9322 param.set("expression", self._parse_field()) 9323 elif ( 9324 prev == "FORMAT" 9325 and self._prev.token_type == TokenType.ALIAS 9326 and self._match_texts(("AVRO", "JSON")) 9327 ): 9328 param.set("this", exp.var(f"FORMAT AS {self._prev.text.upper()}")) 9329 param.set("expression", self._parse_field()) 9330 else: 9331 param.set("expression", self._parse_unquoted_field() or self._parse_bracket()) 9332 9333 options.append(param) 9334 9335 if sep: 9336 self._match(sep) 9337 9338 return options 9339 9340 def _parse_credentials(self) -> exp.Credentials | None: 9341 expr = self.expression(exp.Credentials()) 9342 9343 if self._match_text_seq("STORAGE_INTEGRATION", "="): 9344 expr.set("storage", self._parse_field()) 9345 if self._match_text_seq("CREDENTIALS"): 9346 # Snowflake case: CREDENTIALS = (...), Redshift case: CREDENTIALS <string> 9347 creds = ( 9348 self._parse_wrapped_options() if self._match(TokenType.EQ) else self._parse_field() 9349 ) 9350 expr.set("credentials", creds) 9351 if self._match_text_seq("ENCRYPTION"): 9352 expr.set("encryption", self._parse_wrapped_options()) 9353 if self._match_text_seq("IAM_ROLE"): 9354 expr.set( 9355 "iam_role", 9356 exp.var(self._prev.text) if self._match(TokenType.DEFAULT) else self._parse_field(), 9357 ) 9358 if self._match_text_seq("REGION"): 9359 expr.set("region", self._parse_field()) 9360 9361 return expr 9362 9363 def _parse_file_location(self) -> exp.Expr | None: 9364 return self._parse_field() 9365 9366 def _parse_copy(self) -> exp.Copy | exp.Command: 9367 start = self._prev 9368 9369 self._match(TokenType.INTO) 9370 9371 this = ( 9372 self._parse_select(nested=True, parse_subquery_alias=False) 9373 if self._match(TokenType.L_PAREN, advance=False) 9374 else self._parse_table(schema=True) 9375 ) 9376 9377 kind = self._match(TokenType.FROM) or not self._match_text_seq("TO") 9378 9379 files = self._parse_csv(self._parse_file_location) 9380 if self._match(TokenType.EQ, advance=False): 9381 # Backtrack one token since we've consumed the lhs of a parameter assignment here. 9382 # This can happen for Snowflake dialect. Instead, we'd like to parse the parameter 9383 # list via `_parse_wrapped(..)` below. 9384 self._advance(-1) 9385 files = [] 9386 9387 credentials = self._parse_credentials() 9388 9389 self._match_text_seq("WITH") 9390 9391 params = self._parse_wrapped(self._parse_copy_parameters, optional=True) 9392 9393 # Fallback case 9394 if self._curr: 9395 return self._parse_as_command(start) 9396 9397 return self.expression( 9398 exp.Copy(this=this, kind=kind, credentials=credentials, files=files, params=params) 9399 ) 9400 9401 def _parse_normalize(self) -> exp.Normalize: 9402 return self.expression( 9403 exp.Normalize( 9404 this=self._parse_bitwise(), form=self._match(TokenType.COMMA) and self._parse_var() 9405 ) 9406 ) 9407 9408 def _parse_ceil_floor(self, expr_type: type[TCeilFloor]) -> TCeilFloor: 9409 args = self._parse_csv(lambda: self._parse_lambda()) 9410 9411 this = seq_get(args, 0) 9412 decimals = seq_get(args, 1) 9413 9414 return expr_type( 9415 this=this, 9416 decimals=decimals, 9417 to=self._parse_var() if self._match_text_seq("TO") else None, 9418 ) 9419 9420 def _parse_star_ops(self) -> exp.Expr | None: 9421 star_token = self._prev 9422 9423 if self._match_text_seq("COLUMNS", "(", advance=False): 9424 this = self._parse_function() 9425 if isinstance(this, exp.Columns): 9426 this.set("unpack", True) 9427 return this 9428 9429 return self.expression( 9430 exp.Star( 9431 except_=self._parse_star_op("EXCEPT", "EXCLUDE"), 9432 replace=self._parse_star_op("REPLACE"), 9433 rename=self._parse_star_op("RENAME"), 9434 ) 9435 ).update_positions(star_token) 9436 9437 def _parse_grant_privilege(self) -> exp.GrantPrivilege | None: 9438 privilege_parts = [] 9439 9440 # Keep consuming consecutive keywords until comma (end of this privilege) or ON 9441 # (end of privilege list) or L_PAREN (start of column list) are met 9442 while self._curr and not self._match_set(self.PRIVILEGE_FOLLOW_TOKENS, advance=False): 9443 privilege_parts.append(self._curr.text.upper()) 9444 self._advance() 9445 9446 this = exp.var(" ".join(privilege_parts)) 9447 expressions = ( 9448 self._parse_wrapped_csv(self._parse_column) 9449 if self._match(TokenType.L_PAREN, advance=False) 9450 else None 9451 ) 9452 9453 return self.expression(exp.GrantPrivilege(this=this, expressions=expressions)) 9454 9455 def _parse_grant_principal(self) -> exp.GrantPrincipal | None: 9456 kind = self._match_texts(("ROLE", "GROUP")) and self._prev.text.upper() 9457 principal = self._parse_id_var() 9458 9459 if not principal: 9460 return None 9461 9462 return self.expression(exp.GrantPrincipal(this=principal, kind=kind)) 9463 9464 def _parse_grant_revoke_common( 9465 self, 9466 ) -> tuple[list | None, str | None, exp.Expr | None]: 9467 privileges = self._parse_csv(self._parse_grant_privilege) 9468 9469 self._match(TokenType.ON) 9470 kind = self._prev.text.upper() if self._match_set(self.CREATABLES) else None 9471 9472 # Attempt to parse the securable e.g. MySQL allows names 9473 # such as "foo.*", "*.*" which are not easily parseable yet 9474 securable = self._try_parse(self._parse_table_parts) 9475 9476 return privileges, kind, securable 9477 9478 def _parse_grant(self) -> exp.Grant | exp.Command: 9479 start = self._prev 9480 9481 privileges, kind, securable = self._parse_grant_revoke_common() 9482 9483 if not securable or not self._match_text_seq("TO"): 9484 return self._parse_as_command(start) 9485 9486 principals = self._parse_csv(self._parse_grant_principal) 9487 9488 grant_option = self._match_text_seq("WITH", "GRANT", "OPTION") 9489 9490 if self._curr: 9491 return self._parse_as_command(start) 9492 9493 return self.expression( 9494 exp.Grant( 9495 privileges=privileges, 9496 kind=kind, 9497 securable=securable, 9498 principals=principals, 9499 grant_option=grant_option, 9500 ) 9501 ) 9502 9503 def _parse_revoke(self) -> exp.Revoke | exp.Command: 9504 start = self._prev 9505 9506 grant_option = self._match_text_seq("GRANT", "OPTION", "FOR") 9507 9508 privileges, kind, securable = self._parse_grant_revoke_common() 9509 9510 if not securable or not self._match_text_seq("FROM"): 9511 return self._parse_as_command(start) 9512 9513 principals = self._parse_csv(self._parse_grant_principal) 9514 9515 cascade = None 9516 if self._match_texts(("CASCADE", "RESTRICT")): 9517 cascade = self._prev.text.upper() 9518 9519 if self._curr: 9520 return self._parse_as_command(start) 9521 9522 return self.expression( 9523 exp.Revoke( 9524 privileges=privileges, 9525 kind=kind, 9526 securable=securable, 9527 principals=principals, 9528 grant_option=grant_option, 9529 cascade=cascade, 9530 ) 9531 ) 9532 9533 def _parse_overlay(self) -> exp.Overlay: 9534 def _parse_overlay_arg(text: str) -> exp.Expr | None: 9535 return ( 9536 self._parse_bitwise() 9537 if self._match(TokenType.COMMA) or self._match_text_seq(text) 9538 else None 9539 ) 9540 9541 return self.expression( 9542 exp.Overlay( 9543 this=self._parse_bitwise(), 9544 expression=_parse_overlay_arg("PLACING"), 9545 from_=_parse_overlay_arg("FROM"), 9546 for_=_parse_overlay_arg("FOR"), 9547 ) 9548 ) 9549 9550 def _parse_format_name(self) -> exp.Property: 9551 # Note: Although not specified in the docs, Snowflake does accept a string/identifier 9552 # for FILE_FORMAT = <format_name> 9553 return self.expression( 9554 exp.Property( 9555 this=exp.var("FORMAT_NAME"), value=self._parse_string() or self._parse_table_parts() 9556 ) 9557 ) 9558 9559 def _parse_max_min_by(self, expr_type: type[exp.AggFunc]) -> exp.AggFunc: 9560 args: list[exp.Expr] = [] 9561 9562 if self._match(TokenType.DISTINCT): 9563 args.append(self.expression(exp.Distinct(expressions=[self._parse_lambda()]))) 9564 self._match(TokenType.COMMA) 9565 9566 args.extend(self._parse_function_args()) 9567 9568 return self.expression( 9569 expr_type(this=seq_get(args, 0), expression=seq_get(args, 1), count=seq_get(args, 2)) 9570 ) 9571 9572 def _identifier_expression( 9573 self, token: Token | None = None, quoted: bool | None = None 9574 ) -> exp.Identifier: 9575 token = token or self._prev 9576 return self.expression(exp.Identifier(this=token.text, quoted=quoted), token) 9577 9578 def _build_pipe_cte( 9579 self, 9580 query: exp.Query, 9581 expressions: list[exp.Expr], 9582 alias_cte: exp.TableAlias | None = None, 9583 ) -> exp.Select: 9584 new_cte: str | exp.TableAlias | None 9585 if alias_cte: 9586 new_cte = alias_cte 9587 else: 9588 self._pipe_cte_counter += 1 9589 new_cte = f"__tmp{self._pipe_cte_counter}" 9590 9591 with_ = query.args.get("with_") 9592 ctes = with_.pop() if with_ else None 9593 9594 new_select = exp.select(*expressions, copy=False).from_(new_cte, copy=False) 9595 if ctes: 9596 new_select.set("with_", ctes) 9597 9598 return new_select.with_(new_cte, as_=query, copy=False) 9599 9600 def _parse_pipe_syntax_select(self, query: exp.Select) -> exp.Select: 9601 select = self._parse_select(consume_pipe=False) 9602 if not select: 9603 return query 9604 9605 return self._build_pipe_cte( 9606 query=query.select(*select.expressions, append=False), expressions=[exp.Star()] 9607 ) 9608 9609 def _parse_pipe_syntax_limit(self, query: exp.Select) -> exp.Select: 9610 limit = self._parse_limit() 9611 offset = self._parse_offset() 9612 if limit: 9613 curr_limit = query.args.get("limit", limit) 9614 if curr_limit.expression.to_py() >= limit.expression.to_py(): 9615 query.limit(limit, copy=False) 9616 if offset: 9617 curr_offset = query.args.get("offset") 9618 curr_offset = curr_offset.expression.to_py() if curr_offset else 0 9619 query.offset(exp.Literal.number(curr_offset + offset.expression.to_py()), copy=False) 9620 9621 return query 9622 9623 def _parse_pipe_syntax_aggregate_fields(self) -> exp.Expr | None: 9624 this = self._parse_disjunction() 9625 if self._match_text_seq("GROUP", "AND", advance=False): 9626 return this 9627 9628 this = self._parse_alias(this) 9629 9630 if self._match_set((TokenType.ASC, TokenType.DESC), advance=False): 9631 return self._parse_ordered(lambda: this) 9632 9633 return this 9634 9635 def _parse_pipe_syntax_aggregate_group_order_by( 9636 self, query: exp.Select, group_by_exists: bool = True 9637 ) -> exp.Select: 9638 expr = self._parse_csv(self._parse_pipe_syntax_aggregate_fields) 9639 aggregates_or_groups, orders = [], [] 9640 for element in expr: 9641 if isinstance(element, exp.Ordered): 9642 this = element.this 9643 if isinstance(this, exp.Alias): 9644 element.set("this", this.args["alias"]) 9645 orders.append(element) 9646 else: 9647 this = element 9648 aggregates_or_groups.append(this) 9649 9650 if group_by_exists: 9651 query.select(*aggregates_or_groups, copy=False).group_by( 9652 *[projection.args.get("alias", projection) for projection in aggregates_or_groups], 9653 copy=False, 9654 ) 9655 else: 9656 query.select(*aggregates_or_groups, append=False, copy=False) 9657 9658 if orders: 9659 return query.order_by(*orders, append=False, copy=False) 9660 9661 return query 9662 9663 def _parse_pipe_syntax_aggregate(self, query: exp.Select) -> exp.Select: 9664 self._match_text_seq("AGGREGATE") 9665 query = self._parse_pipe_syntax_aggregate_group_order_by(query, group_by_exists=False) 9666 9667 if self._match(TokenType.GROUP_BY) or ( 9668 self._match_text_seq("GROUP", "AND") and self._match(TokenType.ORDER_BY) 9669 ): 9670 query = self._parse_pipe_syntax_aggregate_group_order_by(query) 9671 9672 return self._build_pipe_cte(query=query, expressions=[exp.Star()]) 9673 9674 def _parse_pipe_syntax_set_operator(self, query: exp.Query) -> exp.Query | None: 9675 first_setop = self.parse_set_operation(this=query) 9676 if not first_setop: 9677 return None 9678 9679 def _parse_and_unwrap_query() -> exp.Expr | None: 9680 expr = self._parse_paren() 9681 return expr.assert_is(exp.Subquery).unnest() if expr else None 9682 9683 first_setop.this.pop() 9684 9685 setops = [ 9686 first_setop.expression.pop().assert_is(exp.Subquery).unnest(), 9687 *self._parse_csv(_parse_and_unwrap_query), 9688 ] 9689 9690 query = self._build_pipe_cte(query=query, expressions=[exp.Star()]) 9691 with_ = query.args.get("with_") 9692 ctes = with_.pop() if with_ else None 9693 9694 if isinstance(first_setop, exp.Union): 9695 query = query.union(*setops, copy=False, **first_setop.args) 9696 elif isinstance(first_setop, exp.Except): 9697 query = query.except_(*setops, copy=False, **first_setop.args) 9698 else: 9699 query = query.intersect(*setops, copy=False, **first_setop.args) 9700 9701 query.set("with_", ctes) 9702 9703 return self._build_pipe_cte(query=query, expressions=[exp.Star()]) 9704 9705 def _parse_pipe_syntax_join(self, query: exp.Query) -> exp.Query | None: 9706 join = self._parse_join() 9707 if not join: 9708 return None 9709 9710 if isinstance(query, exp.Select): 9711 return query.join(join, copy=False) 9712 9713 return query 9714 9715 def _parse_pipe_syntax_pivot(self, query: exp.Select) -> exp.Select: 9716 pivots = self._parse_pivots() 9717 if not pivots: 9718 return query 9719 9720 from_ = query.args.get("from_") 9721 if from_: 9722 from_.this.set("pivots", pivots) 9723 9724 return self._build_pipe_cte(query=query, expressions=[exp.Star()]) 9725 9726 def _parse_pipe_syntax_extend(self, query: exp.Select) -> exp.Select: 9727 self._match_text_seq("EXTEND") 9728 query.select(*[exp.Star(), *self._parse_expressions()], append=False, copy=False) 9729 return self._build_pipe_cte(query=query, expressions=[exp.Star()]) 9730 9731 def _parse_pipe_syntax_tablesample(self, query: exp.Select) -> exp.Select: 9732 sample = self._parse_table_sample() 9733 9734 with_ = query.args.get("with_") 9735 if with_: 9736 with_.expressions[-1].this.set("sample", sample) 9737 else: 9738 query.set("sample", sample) 9739 9740 return query 9741 9742 def _parse_pipe_syntax_query(self, query: exp.Query) -> exp.Query | None: 9743 if isinstance(query, exp.Subquery): 9744 query = exp.select("*").from_(query, copy=False) 9745 9746 if not query.args.get("from_"): 9747 query = exp.select("*").from_(query.subquery(copy=False), copy=False) 9748 9749 while self._match(TokenType.PIPE_GT): 9750 start_index = self._index 9751 start_text = self._curr.text.upper() 9752 parser = self.PIPE_SYNTAX_TRANSFORM_PARSERS.get(start_text) 9753 if not parser: 9754 # The set operators (UNION, etc) and the JOIN operator have a few common starting 9755 # keywords, making it tricky to disambiguate them without lookahead. The approach 9756 # here is to try and parse a set operation and if that fails, then try to parse a 9757 # join operator. If that fails as well, then the operator is not supported. 9758 parsed_query = self._parse_pipe_syntax_set_operator(query) 9759 parsed_query = parsed_query or self._parse_pipe_syntax_join(query) 9760 if not parsed_query: 9761 self._retreat(start_index) 9762 self.raise_error(f"Unsupported pipe syntax operator: '{start_text}'.") 9763 break 9764 query = parsed_query 9765 else: 9766 query = parser(self, query) 9767 9768 return query 9769 9770 def _parse_declareitem(self) -> exp.DeclareItem | None: 9771 self._match_texts(("VAR", "VARIABLE")) 9772 9773 vars = self._parse_csv(self._parse_id_var) 9774 if not vars: 9775 return None 9776 9777 self._match(TokenType.ALIAS) 9778 kind = self._parse_schema() if self._match(TokenType.TABLE) else self._parse_types() 9779 default = ( 9780 self._match(TokenType.DEFAULT) or self._match(TokenType.EQ) 9781 ) and self._parse_bitwise() 9782 9783 return self.expression(exp.DeclareItem(this=vars, kind=kind, default=default)) 9784 9785 def _parse_declare(self) -> exp.Declare | exp.Command: 9786 start = self._prev 9787 replace = self._match_text_seq("OR", "REPLACE") 9788 expressions = self._try_parse(lambda: self._parse_csv(self._parse_declareitem)) 9789 9790 if not expressions or self._curr: 9791 return self._parse_as_command(start) 9792 9793 return self.expression(exp.Declare(expressions=expressions, replace=replace)) 9794 9795 def build_cast(self, strict: bool, **kwargs) -> exp.Cast: 9796 exp_class = exp.Cast if strict else exp.TryCast 9797 9798 if exp_class == exp.TryCast: 9799 kwargs["requires_string"] = self.dialect.TRY_CAST_REQUIRES_STRING 9800 9801 return self.expression(exp_class(**kwargs)) 9802 9803 def _parse_json_value(self) -> exp.JSONValue: 9804 this = self._parse_bitwise() 9805 self._match(TokenType.COMMA) 9806 path = self._parse_bitwise() 9807 9808 returning = self._match(TokenType.RETURNING) and self._parse_type() 9809 9810 return self.expression( 9811 exp.JSONValue( 9812 this=this, 9813 path=self.dialect.to_json_path(path), 9814 returning=returning, 9815 on_condition=self._parse_on_condition(), 9816 ) 9817 ) 9818 9819 def _parse_group_concat(self) -> exp.Expr | None: 9820 def concat_exprs(node: exp.Expr | None, exprs: list[exp.Expr]) -> exp.Expr: 9821 if isinstance(node, exp.Distinct) and len(node.expressions) > 1: 9822 concat_exprs = [ 9823 self.expression( 9824 exp.Concat( 9825 expressions=node.expressions, 9826 safe=True, 9827 coalesce=self.dialect.CONCAT_COALESCE, 9828 ) 9829 ) 9830 ] 9831 node.set("expressions", concat_exprs) 9832 return node 9833 if len(exprs) == 1: 9834 return exprs[0] 9835 return self.expression( 9836 exp.Concat(expressions=args, safe=True, coalesce=self.dialect.CONCAT_COALESCE) 9837 ) 9838 9839 args = self._parse_csv(self._parse_lambda) 9840 9841 if args: 9842 order = args[-1] if isinstance(args[-1], exp.Order) else None 9843 9844 if order: 9845 # Order By is the last (or only) expression in the list and has consumed the 'expr' before it, 9846 # remove 'expr' from exp.Order and add it back to args 9847 args[-1] = order.this 9848 order.set("this", concat_exprs(order.this, args)) 9849 9850 this = order or concat_exprs(args[0], args) 9851 else: 9852 this = None 9853 9854 separator = self._parse_field() if self._match(TokenType.SEPARATOR) else None 9855 9856 return self.expression(exp.GroupConcat(this=this, separator=separator)) 9857 9858 def _parse_initcap(self) -> exp.Initcap: 9859 expr = exp.Initcap.from_arg_list(self._parse_function_args()) 9860 9861 # attach dialect's default delimiters 9862 if expr.args.get("expression") is None: 9863 expr.set("expression", exp.Literal.string(self.dialect.INITCAP_DEFAULT_DELIMITER_CHARS)) 9864 9865 return expr 9866 9867 def _parse_operator(self, this: exp.Expr | None) -> exp.Expr | None: 9868 while True: 9869 if not self._match(TokenType.L_PAREN): 9870 break 9871 9872 op = "" 9873 while self._curr and not self._match(TokenType.R_PAREN): 9874 op += self._curr.text 9875 self._advance() 9876 9877 comments = self._prev_comments 9878 this = self.expression( 9879 exp.Operator(this=this, operator=op, expression=self._parse_bitwise()), 9880 comments=comments, 9881 ) 9882 9883 if not self._match(TokenType.OPERATOR): 9884 break 9885 9886 return this
46def build_var_map(args: BuilderArgs) -> exp.StarMap | exp.VarMap: 47 if len(args) == 1 and args[0].is_star: 48 return exp.StarMap(this=args[0]) 49 50 keys: list[ExpOrStr] = [] 51 values: list[ExpOrStr] = [] 52 for i in range(0, len(args), 2): 53 keys.append(args[i]) 54 values.append(args[i + 1]) 55 56 return exp.VarMap(keys=exp.array(*keys, copy=False), values=exp.array(*values, copy=False))
64def binary_range_parser( 65 expr_type: Type[exp.Expr], reverse_args: bool = False 66) -> t.Callable[[Parser, exp.Expr | None], exp.Expr | None]: 67 def _parse_binary_range(self: Parser, this: exp.Expr | None) -> exp.Expr | None: 68 expression = self._parse_bitwise() 69 if reverse_args: 70 this, expression = expression, this 71 return self._parse_escape(self.expression(expr_type(this=this, expression=expression))) 72 73 return _parse_binary_range
76def build_logarithm(args: BuilderArgs, dialect: Dialect) -> exp.Func: 77 # Default argument order is base, expression 78 this = seq_get(args, 0) 79 expression = seq_get(args, 1) 80 81 if expression: 82 if not dialect.LOG_BASE_FIRST: 83 this, expression = expression, this 84 return exp.Log(this=this, expression=expression) 85 86 return (exp.Ln if dialect.parser_class.LOG_DEFAULTS_TO_LN else exp.Log)(this=this)
106def build_extract_json_with_path( 107 expr_type: Type[E], 108) -> t.Callable[[BuilderArgs, Dialect], E]: 109 def _builder(args: BuilderArgs, dialect: Dialect) -> E: 110 expression = expr_type( 111 this=seq_get(args, 0), expression=dialect.to_json_path(seq_get(args, 1)) 112 ) 113 if len(args) > 2 and expr_type is exp.JSONExtract: 114 expression.set("expressions", args[2:]) 115 if expr_type is exp.JSONExtractScalar: 116 expression.set("scalar_only", dialect.JSON_EXTRACT_SCALAR_SCALAR_ONLY) 117 118 return expression 119 120 return _builder
123def build_mod(args: BuilderArgs) -> exp.Mod: 124 this = seq_get(args, 0) 125 expression = seq_get(args, 1) 126 127 # Wrap the operands if they are binary nodes, e.g. MOD(a + 1, 7) -> (a + 1) % 7 128 this = exp.Paren(this=this) if isinstance(this, exp.Binary) else this 129 expression = exp.Paren(this=expression) if isinstance(expression, exp.Binary) else expression 130 131 return exp.Mod(this=this, expression=expression)
143def build_array_constructor( 144 exp_class: Type[E], args: list[t.Any], bracket_kind: TokenType, dialect: Dialect 145) -> exp.Expr: 146 array_exp = exp_class(expressions=args) 147 148 if exp_class == exp.Array and dialect.HAS_DISTINCT_ARRAY_CONSTRUCTORS: 149 array_exp.set("bracket_notation", bracket_kind == TokenType.L_BRACKET) 150 151 return array_exp
154def build_convert_timezone( 155 args: BuilderArgs, default_source_tz: str | None = None 156) -> exp.ConvertTimezone | exp.Anonymous: 157 if len(args) == 2: 158 source_tz = exp.Literal.string(default_source_tz) if default_source_tz else None 159 return exp.ConvertTimezone( 160 source_tz=source_tz, target_tz=seq_get(args, 0), timestamp=seq_get(args, 1) 161 ) 162 163 return exp.ConvertTimezone.from_arg_list(args)
166def build_trim(args: BuilderArgs, is_left: bool = True, reverse_args: bool = False) -> exp.Trim: 167 this, expression = seq_get(args, 0), seq_get(args, 1) 168 169 if expression and reverse_args: 170 this, expression = expression, this 171 172 return exp.Trim(this=this, expression=expression, position="LEADING" if is_left else "TRAILING")
189def build_array_append(args: BuilderArgs, dialect: Dialect) -> exp.ArrayAppend: 190 """ 191 Builds ArrayAppend with NULL propagation semantics based on the dialect configuration. 192 193 Some dialects (Databricks, Spark, Snowflake) return NULL when the input array is NULL. 194 Others (DuckDB, PostgreSQL) create a new single-element array instead. 195 196 Args: 197 args: Function arguments [array, element] 198 dialect: The dialect to read ARRAY_FUNCS_PROPAGATES_NULLS from 199 200 Returns: 201 ArrayAppend expression with appropriate null_propagation flag 202 """ 203 return exp.ArrayAppend( 204 this=seq_get(args, 0), 205 expression=seq_get(args, 1), 206 null_propagation=dialect.ARRAY_FUNCS_PROPAGATES_NULLS, 207 )
Builds ArrayAppend with NULL propagation semantics based on the dialect configuration.
Some dialects (Databricks, Spark, Snowflake) return NULL when the input array is NULL. Others (DuckDB, PostgreSQL) create a new single-element array instead.
Arguments:
- args: Function arguments [array, element]
- dialect: The dialect to read ARRAY_FUNCS_PROPAGATES_NULLS from
Returns:
ArrayAppend expression with appropriate null_propagation flag
210def build_array_prepend(args: BuilderArgs, dialect: Dialect) -> exp.ArrayPrepend: 211 """ 212 Builds ArrayPrepend with NULL propagation semantics based on the dialect configuration. 213 214 Some dialects (Databricks, Spark, Snowflake) return NULL when the input array is NULL. 215 Others (DuckDB, PostgreSQL) create a new single-element array instead. 216 217 Args: 218 args: Function arguments [array, element] 219 dialect: The dialect to read ARRAY_FUNCS_PROPAGATES_NULLS from 220 221 Returns: 222 ArrayPrepend expression with appropriate null_propagation flag 223 """ 224 return exp.ArrayPrepend( 225 this=seq_get(args, 0), 226 expression=seq_get(args, 1), 227 null_propagation=dialect.ARRAY_FUNCS_PROPAGATES_NULLS, 228 )
Builds ArrayPrepend with NULL propagation semantics based on the dialect configuration.
Some dialects (Databricks, Spark, Snowflake) return NULL when the input array is NULL. Others (DuckDB, PostgreSQL) create a new single-element array instead.
Arguments:
- args: Function arguments [array, element]
- dialect: The dialect to read ARRAY_FUNCS_PROPAGATES_NULLS from
Returns:
ArrayPrepend expression with appropriate null_propagation flag
231def build_array_concat(args: BuilderArgs, dialect: Dialect) -> exp.ArrayConcat: 232 """ 233 Builds ArrayConcat with NULL propagation semantics based on the dialect configuration. 234 235 Some dialects (Redshift, Snowflake) return NULL when any input array is NULL. 236 Others (DuckDB, PostgreSQL) skip NULL arrays and continue concatenation. 237 238 Args: 239 args: Function arguments [array1, array2, ...] (variadic) 240 dialect: The dialect to read ARRAY_FUNCS_PROPAGATES_NULLS from 241 242 Returns: 243 ArrayConcat expression with appropriate null_propagation flag 244 """ 245 return exp.ArrayConcat( 246 this=seq_get(args, 0), 247 expressions=args[1:], 248 null_propagation=dialect.ARRAY_FUNCS_PROPAGATES_NULLS, 249 )
Builds ArrayConcat with NULL propagation semantics based on the dialect configuration.
Some dialects (Redshift, Snowflake) return NULL when any input array is NULL. Others (DuckDB, PostgreSQL) skip NULL arrays and continue concatenation.
Arguments:
- args: Function arguments [array1, array2, ...] (variadic)
- dialect: The dialect to read ARRAY_FUNCS_PROPAGATES_NULLS from
Returns:
ArrayConcat expression with appropriate null_propagation flag
252def build_array_remove(args: BuilderArgs, dialect: Dialect) -> exp.ArrayRemove: 253 """ 254 Builds ArrayRemove with NULL propagation semantics based on the dialect configuration. 255 256 Some dialects (Snowflake) return NULL when the removal value is NULL. 257 Others (DuckDB) may return empty array due to NULL comparison semantics. 258 259 Args: 260 args: Function arguments [array, value_to_remove] 261 dialect: The dialect to read ARRAY_FUNCS_PROPAGATES_NULLS from 262 263 Returns: 264 ArrayRemove expression with appropriate null_propagation flag 265 """ 266 return exp.ArrayRemove( 267 this=seq_get(args, 0), 268 expression=seq_get(args, 1), 269 null_propagation=dialect.ARRAY_FUNCS_PROPAGATES_NULLS, 270 )
Builds ArrayRemove with NULL propagation semantics based on the dialect configuration.
Some dialects (Snowflake) return NULL when the removal value is NULL. Others (DuckDB) may return empty array due to NULL comparison semantics.
Arguments:
- args: Function arguments [array, value_to_remove]
- dialect: The dialect to read ARRAY_FUNCS_PROPAGATES_NULLS from
Returns:
ArrayRemove expression with appropriate null_propagation flag
282class Parser: 283 """ 284 Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree. 285 286 Args: 287 error_level: The desired error level. 288 Default: ErrorLevel.IMMEDIATE 289 error_message_context: The amount of context to capture from a query string when displaying 290 the error message (in number of characters). 291 Default: 100 292 max_errors: Maximum number of error messages to include in a raised ParseError. 293 This is only relevant if error_level is ErrorLevel.RAISE. 294 Default: 3 295 max_nodes: Maximum number of AST nodes to prevent memory exhaustion. 296 Set to -1 (default) to disable the check. 297 """ 298 299 __slots__ = ( 300 "error_level", 301 "error_message_context", 302 "max_errors", 303 "max_nodes", 304 "dialect", 305 "sql", 306 "errors", 307 "_tokens", 308 "_index", 309 "_curr", 310 "_next", 311 "_prev", 312 "_prev_comments", 313 "_pipe_cte_counter", 314 "_chunks", 315 "_chunk_index", 316 "_tokens_size", 317 "_node_count", 318 ) 319 320 FUNCTIONS: t.ClassVar[dict[str, t.Callable]] = { 321 **{name: func.from_arg_list for name, func in exp.FUNCTION_BY_NAME.items()}, 322 **dict.fromkeys(("COALESCE", "IFNULL", "NVL"), build_coalesce), 323 "ARRAY": lambda args, dialect: exp.Array(expressions=args), 324 "ARRAYAGG": lambda args, dialect: exp.ArrayAgg( 325 this=seq_get(args, 0), nulls_excluded=dialect.ARRAY_AGG_INCLUDES_NULLS is None or None 326 ), 327 "ARRAY_AGG": lambda args, dialect: exp.ArrayAgg( 328 this=seq_get(args, 0), nulls_excluded=dialect.ARRAY_AGG_INCLUDES_NULLS is None or None 329 ), 330 "ARRAY_APPEND": build_array_append, 331 "ARRAY_CAT": build_array_concat, 332 "ARRAY_CONCAT": build_array_concat, 333 "ARRAY_INTERSECT": lambda args: exp.ArrayIntersect(expressions=args), 334 "ARRAY_INTERSECTION": lambda args: exp.ArrayIntersect(expressions=args), 335 "ARRAY_PREPEND": build_array_prepend, 336 "ARRAY_REMOVE": build_array_remove, 337 "COUNT": lambda args: exp.Count(this=seq_get(args, 0), expressions=args[1:], big_int=True), 338 "CONCAT": lambda args, dialect: exp.Concat( 339 expressions=args, 340 safe=not dialect.STRICT_STRING_CONCAT, 341 coalesce=dialect.CONCAT_COALESCE, 342 ), 343 "CONCAT_WS": lambda args, dialect: exp.ConcatWs( 344 expressions=args, 345 safe=not dialect.STRICT_STRING_CONCAT, 346 coalesce=dialect.CONCAT_COALESCE, 347 ), 348 "CONVERT_TIMEZONE": build_convert_timezone, 349 "DATE_TO_DATE_STR": lambda args: exp.Cast( 350 this=seq_get(args, 0), 351 to=exp.DataType(this=exp.DType.TEXT), 352 ), 353 "GENERATE_DATE_ARRAY": lambda args: exp.GenerateDateArray( 354 start=seq_get(args, 0), 355 end=seq_get(args, 1), 356 step=seq_get(args, 2) or exp.Interval(this=exp.Literal.string(1), unit=exp.var("DAY")), 357 ), 358 "GENERATE_UUID": lambda args, dialect: exp.Uuid( 359 is_string=dialect.UUID_IS_STRING_TYPE or None 360 ), 361 "GLOB": lambda args: exp.Glob(this=seq_get(args, 1), expression=seq_get(args, 0)), 362 "GREATEST": lambda args, dialect: exp.Greatest( 363 this=seq_get(args, 0), 364 expressions=args[1:], 365 ignore_nulls=dialect.LEAST_GREATEST_IGNORES_NULLS, 366 ), 367 "LEAST": lambda args, dialect: exp.Least( 368 this=seq_get(args, 0), 369 expressions=args[1:], 370 ignore_nulls=dialect.LEAST_GREATEST_IGNORES_NULLS, 371 ), 372 "HEX": build_hex, 373 "JSON_EXTRACT": build_extract_json_with_path(exp.JSONExtract), 374 "JSON_EXTRACT_SCALAR": build_extract_json_with_path(exp.JSONExtractScalar), 375 "JSON_EXTRACT_PATH_TEXT": build_extract_json_with_path(exp.JSONExtractScalar), 376 "JSON_KEYS": lambda args, dialect: exp.JSONKeys( 377 this=seq_get(args, 0), expression=dialect.to_json_path(seq_get(args, 1)) 378 ), 379 "LIKE": build_like, 380 "LOG": build_logarithm, 381 "LOG2": lambda args: exp.Log(this=exp.Literal.number(2), expression=seq_get(args, 0)), 382 "LOG10": lambda args: exp.Log(this=exp.Literal.number(10), expression=seq_get(args, 0)), 383 "LOWER": build_lower, 384 "LPAD": lambda args: build_pad(args), 385 "LEFTPAD": lambda args: build_pad(args), 386 "LTRIM": lambda args: build_trim(args), 387 "MOD": build_mod, 388 "RIGHTPAD": lambda args: build_pad(args, is_left=False), 389 "RPAD": lambda args: build_pad(args, is_left=False), 390 "RTRIM": lambda args: build_trim(args, is_left=False), 391 "SCOPE_RESOLUTION": lambda args: ( 392 exp.ScopeResolution(expression=seq_get(args, 0)) 393 if len(args) != 2 394 else exp.ScopeResolution(this=seq_get(args, 0), expression=seq_get(args, 1)) 395 ), 396 "STRPOS": exp.StrPosition.from_arg_list, 397 "CHARINDEX": lambda args: build_locate_strposition(args), 398 "INSTR": exp.StrPosition.from_arg_list, 399 "LOCATE": lambda args: build_locate_strposition(args), 400 "TIME_TO_TIME_STR": lambda args: exp.Cast( 401 this=seq_get(args, 0), 402 to=exp.DataType(this=exp.DType.TEXT), 403 ), 404 "TO_HEX": build_hex, 405 "TS_OR_DS_TO_DATE_STR": lambda args: exp.Substring( 406 this=exp.Cast( 407 this=seq_get(args, 0), 408 to=exp.DataType(this=exp.DType.TEXT), 409 ), 410 start=exp.Literal.number(1), 411 length=exp.Literal.number(10), 412 ), 413 "UNNEST": lambda args: exp.Unnest(expressions=ensure_list(seq_get(args, 0))), 414 "UPPER": build_upper, 415 "UUID": lambda args, dialect: exp.Uuid(is_string=dialect.UUID_IS_STRING_TYPE or None), 416 "VAR_MAP": build_var_map, 417 } 418 419 NO_PAREN_FUNCTIONS: t.ClassVar[dict] = { 420 TokenType.CURRENT_DATE: exp.CurrentDate, 421 TokenType.CURRENT_DATETIME: exp.CurrentDate, 422 TokenType.CURRENT_TIME: exp.CurrentTime, 423 TokenType.CURRENT_TIMESTAMP: exp.CurrentTimestamp, 424 TokenType.CURRENT_USER: exp.CurrentUser, 425 TokenType.CURRENT_ROLE: exp.CurrentRole, 426 } 427 428 STRUCT_TYPE_TOKENS: t.ClassVar = { 429 TokenType.NESTED, 430 TokenType.OBJECT, 431 TokenType.STRUCT, 432 TokenType.UNION, 433 } 434 435 NESTED_TYPE_TOKENS: t.ClassVar = { 436 TokenType.ARRAY, 437 TokenType.LIST, 438 TokenType.LOWCARDINALITY, 439 TokenType.MAP, 440 TokenType.NULLABLE, 441 TokenType.RANGE, 442 *STRUCT_TYPE_TOKENS, 443 } 444 445 ENUM_TYPE_TOKENS: t.ClassVar = { 446 TokenType.DYNAMIC, 447 TokenType.ENUM, 448 TokenType.ENUM8, 449 TokenType.ENUM16, 450 } 451 452 AGGREGATE_TYPE_TOKENS: t.ClassVar = { 453 TokenType.AGGREGATEFUNCTION, 454 TokenType.SIMPLEAGGREGATEFUNCTION, 455 } 456 457 TYPE_TOKENS: t.ClassVar = { 458 TokenType.BIT, 459 TokenType.BOOLEAN, 460 TokenType.TINYINT, 461 TokenType.UTINYINT, 462 TokenType.SMALLINT, 463 TokenType.USMALLINT, 464 TokenType.INT, 465 TokenType.UINT, 466 TokenType.BIGINT, 467 TokenType.UBIGINT, 468 TokenType.BIGNUM, 469 TokenType.INT128, 470 TokenType.UINT128, 471 TokenType.INT256, 472 TokenType.UINT256, 473 TokenType.MEDIUMINT, 474 TokenType.UMEDIUMINT, 475 TokenType.FIXEDSTRING, 476 TokenType.FLOAT, 477 TokenType.DOUBLE, 478 TokenType.UDOUBLE, 479 TokenType.CHAR, 480 TokenType.NCHAR, 481 TokenType.VARCHAR, 482 TokenType.NVARCHAR, 483 TokenType.BPCHAR, 484 TokenType.TEXT, 485 TokenType.MEDIUMTEXT, 486 TokenType.LONGTEXT, 487 TokenType.BLOB, 488 TokenType.MEDIUMBLOB, 489 TokenType.LONGBLOB, 490 TokenType.BINARY, 491 TokenType.VARBINARY, 492 TokenType.JSON, 493 TokenType.JSONB, 494 TokenType.INTERVAL, 495 TokenType.TINYBLOB, 496 TokenType.TINYTEXT, 497 TokenType.TIME, 498 TokenType.TIMETZ, 499 TokenType.TIME_NS, 500 TokenType.TIMESTAMP, 501 TokenType.TIMESTAMP_S, 502 TokenType.TIMESTAMP_MS, 503 TokenType.TIMESTAMP_NS, 504 TokenType.TIMESTAMPTZ, 505 TokenType.TIMESTAMPLTZ, 506 TokenType.TIMESTAMPNTZ, 507 TokenType.DATETIME, 508 TokenType.DATETIME2, 509 TokenType.DATETIME64, 510 TokenType.SMALLDATETIME, 511 TokenType.DATE, 512 TokenType.DATE32, 513 TokenType.INT4RANGE, 514 TokenType.INT4MULTIRANGE, 515 TokenType.INT8RANGE, 516 TokenType.INT8MULTIRANGE, 517 TokenType.NUMRANGE, 518 TokenType.NUMMULTIRANGE, 519 TokenType.TSRANGE, 520 TokenType.TSMULTIRANGE, 521 TokenType.TSTZRANGE, 522 TokenType.TSTZMULTIRANGE, 523 TokenType.DATERANGE, 524 TokenType.DATEMULTIRANGE, 525 TokenType.DECIMAL, 526 TokenType.DECIMAL32, 527 TokenType.DECIMAL64, 528 TokenType.DECIMAL128, 529 TokenType.DECIMAL256, 530 TokenType.DECFLOAT, 531 TokenType.UDECIMAL, 532 TokenType.BIGDECIMAL, 533 TokenType.UUID, 534 TokenType.GEOGRAPHY, 535 TokenType.GEOGRAPHYPOINT, 536 TokenType.GEOMETRY, 537 TokenType.POINT, 538 TokenType.RING, 539 TokenType.LINESTRING, 540 TokenType.MULTILINESTRING, 541 TokenType.POLYGON, 542 TokenType.MULTIPOLYGON, 543 TokenType.HLLSKETCH, 544 TokenType.HSTORE, 545 TokenType.PSEUDO_TYPE, 546 TokenType.SUPER, 547 TokenType.SERIAL, 548 TokenType.SMALLSERIAL, 549 TokenType.BIGSERIAL, 550 TokenType.XML, 551 TokenType.YEAR, 552 TokenType.USERDEFINED, 553 TokenType.MONEY, 554 TokenType.SMALLMONEY, 555 TokenType.ROWVERSION, 556 TokenType.IMAGE, 557 TokenType.VARIANT, 558 TokenType.VECTOR, 559 TokenType.VOID, 560 TokenType.OBJECT, 561 TokenType.OBJECT_IDENTIFIER, 562 TokenType.INET, 563 TokenType.IPADDRESS, 564 TokenType.IPPREFIX, 565 TokenType.IPV4, 566 TokenType.IPV6, 567 TokenType.UNKNOWN, 568 TokenType.NOTHING, 569 TokenType.NULL, 570 TokenType.NAME, 571 TokenType.TDIGEST, 572 TokenType.DYNAMIC, 573 *ENUM_TYPE_TOKENS, 574 *NESTED_TYPE_TOKENS, 575 *AGGREGATE_TYPE_TOKENS, 576 } 577 578 SIGNED_TO_UNSIGNED_TYPE_TOKEN: t.ClassVar = { 579 TokenType.BIGINT: TokenType.UBIGINT, 580 TokenType.INT: TokenType.UINT, 581 TokenType.MEDIUMINT: TokenType.UMEDIUMINT, 582 TokenType.SMALLINT: TokenType.USMALLINT, 583 TokenType.TINYINT: TokenType.UTINYINT, 584 TokenType.DECIMAL: TokenType.UDECIMAL, 585 TokenType.DOUBLE: TokenType.UDOUBLE, 586 } 587 588 SUBQUERY_PREDICATES: t.ClassVar = { 589 TokenType.ANY: exp.Any, 590 TokenType.ALL: exp.All, 591 TokenType.EXISTS: exp.Exists, 592 TokenType.SOME: exp.Any, 593 } 594 595 SUBQUERY_TOKENS: t.ClassVar = { 596 TokenType.SELECT, 597 TokenType.WITH, 598 TokenType.FROM, 599 } 600 601 RESERVED_TOKENS: t.ClassVar = { 602 *Tokenizer.SINGLE_TOKENS.values(), 603 TokenType.SELECT, 604 } - {TokenType.IDENTIFIER} 605 606 DB_CREATABLES: t.ClassVar = { 607 TokenType.DATABASE, 608 TokenType.DICTIONARY, 609 TokenType.FILE_FORMAT, 610 TokenType.MODEL, 611 TokenType.NAMESPACE, 612 TokenType.SCHEMA, 613 TokenType.SEMANTIC_VIEW, 614 TokenType.SEQUENCE, 615 TokenType.SINK, 616 TokenType.SOURCE, 617 TokenType.STAGE, 618 TokenType.STORAGE_INTEGRATION, 619 TokenType.STREAMLIT, 620 TokenType.TABLE, 621 TokenType.TAG, 622 TokenType.VIEW, 623 TokenType.WAREHOUSE, 624 } 625 626 CREATABLES: t.ClassVar = { 627 TokenType.COLUMN, 628 TokenType.CONSTRAINT, 629 TokenType.FOREIGN_KEY, 630 TokenType.FUNCTION, 631 TokenType.INDEX, 632 TokenType.PROCEDURE, 633 TokenType.TRIGGER, 634 *DB_CREATABLES, 635 } 636 637 TRIGGER_EVENTS: t.ClassVar = { 638 TokenType.INSERT, 639 TokenType.UPDATE, 640 TokenType.DELETE, 641 TokenType.TRUNCATE, 642 } 643 644 ALTERABLES: t.ClassVar = { 645 TokenType.INDEX, 646 TokenType.TABLE, 647 TokenType.VIEW, 648 TokenType.SESSION, 649 } 650 651 # Tokens that can represent identifiers 652 ID_VAR_TOKENS: t.ClassVar[set] = { 653 TokenType.ALL, 654 TokenType.ANALYZE, 655 TokenType.ATTACH, 656 TokenType.VAR, 657 TokenType.ANTI, 658 TokenType.APPLY, 659 TokenType.ASC, 660 TokenType.ASOF, 661 TokenType.AUTO_INCREMENT, 662 TokenType.BEGIN, 663 TokenType.BPCHAR, 664 TokenType.CACHE, 665 TokenType.CASE, 666 TokenType.COLLATE, 667 TokenType.COMMAND, 668 TokenType.COMMENT, 669 TokenType.COMMIT, 670 TokenType.CONSTRAINT, 671 TokenType.COPY, 672 TokenType.CUBE, 673 TokenType.CURRENT_SCHEMA, 674 TokenType.DEFAULT, 675 TokenType.DELETE, 676 TokenType.DESC, 677 TokenType.DESCRIBE, 678 TokenType.DETACH, 679 TokenType.DICTIONARY, 680 TokenType.DIV, 681 TokenType.END, 682 TokenType.EXECUTE, 683 TokenType.EXPORT, 684 TokenType.ESCAPE, 685 TokenType.FALSE, 686 TokenType.FIRST, 687 TokenType.FILE, 688 TokenType.FILTER, 689 TokenType.FINAL, 690 TokenType.FORMAT, 691 TokenType.FULL, 692 TokenType.GET, 693 TokenType.IDENTIFIER, 694 TokenType.INOUT, 695 TokenType.IS, 696 TokenType.ISNULL, 697 TokenType.INTERVAL, 698 TokenType.KEEP, 699 TokenType.KILL, 700 TokenType.LEFT, 701 TokenType.LIMIT, 702 TokenType.LOAD, 703 TokenType.LOCK, 704 TokenType.MATCH, 705 TokenType.MERGE, 706 TokenType.NATURAL, 707 TokenType.NEXT, 708 TokenType.OFFSET, 709 TokenType.OPERATOR, 710 TokenType.ORDINALITY, 711 TokenType.OVER, 712 TokenType.OVERLAPS, 713 TokenType.OVERWRITE, 714 TokenType.PARTITION, 715 TokenType.PERCENT, 716 TokenType.PIVOT, 717 TokenType.PRAGMA, 718 TokenType.PUT, 719 TokenType.RANGE, 720 TokenType.RECURSIVE, 721 TokenType.REFERENCES, 722 TokenType.REFRESH, 723 TokenType.RENAME, 724 TokenType.REPLACE, 725 TokenType.RIGHT, 726 TokenType.ROLLUP, 727 TokenType.ROW, 728 TokenType.ROWS, 729 TokenType.SEMI, 730 TokenType.SET, 731 TokenType.SETTINGS, 732 TokenType.SHOW, 733 TokenType.STREAM, 734 TokenType.STREAMLIT, 735 TokenType.TEMPORARY, 736 TokenType.TOP, 737 TokenType.TRUE, 738 TokenType.TRUNCATE, 739 TokenType.UNIQUE, 740 TokenType.UNNEST, 741 TokenType.UNPIVOT, 742 TokenType.UPDATE, 743 TokenType.USE, 744 TokenType.VOLATILE, 745 TokenType.WINDOW, 746 TokenType.CURRENT_CATALOG, 747 TokenType.LOCALTIME, 748 TokenType.LOCALTIMESTAMP, 749 TokenType.SESSION_USER, 750 TokenType.STRAIGHT_JOIN, 751 *ALTERABLES, 752 *CREATABLES, 753 *SUBQUERY_PREDICATES, 754 *TYPE_TOKENS, 755 *NO_PAREN_FUNCTIONS, 756 } - {TokenType.UNION} 757 758 TABLE_ALIAS_TOKENS: t.ClassVar[set] = ID_VAR_TOKENS - { 759 TokenType.ANTI, 760 TokenType.ASOF, 761 TokenType.FULL, 762 TokenType.LEFT, 763 TokenType.LOCK, 764 TokenType.NATURAL, 765 TokenType.RIGHT, 766 TokenType.SEMI, 767 TokenType.WINDOW, 768 } 769 770 ALIAS_TOKENS: t.ClassVar = ID_VAR_TOKENS 771 772 COLON_PLACEHOLDER_TOKENS: t.ClassVar = ID_VAR_TOKENS 773 774 ARRAY_CONSTRUCTORS: t.ClassVar = { 775 "ARRAY": exp.Array, 776 "LIST": exp.List, 777 } 778 779 COMMENT_TABLE_ALIAS_TOKENS: t.ClassVar = TABLE_ALIAS_TOKENS - {TokenType.IS} 780 781 UPDATE_ALIAS_TOKENS: t.ClassVar = TABLE_ALIAS_TOKENS - {TokenType.SET} 782 783 TRIM_TYPES: t.ClassVar = {"LEADING", "TRAILING", "BOTH"} 784 785 # Tokens that indicate a simple column reference 786 IDENTIFIER_TOKENS: t.ClassVar[frozenset] = frozenset({TokenType.VAR, TokenType.IDENTIFIER}) 787 788 BRACKETS: t.ClassVar[frozenset] = frozenset({TokenType.L_BRACKET, TokenType.L_BRACE}) 789 790 # Postfix tokens that prevent the bare column fast path 791 COLUMN_POSTFIX_TOKENS: t.ClassVar[frozenset] = frozenset( 792 { 793 TokenType.L_PAREN, 794 TokenType.L_BRACKET, 795 TokenType.L_BRACE, 796 TokenType.COLON, 797 TokenType.JOIN_MARKER, 798 } 799 ) 800 801 TABLE_POSTFIX_TOKENS: t.ClassVar[frozenset] = frozenset( 802 { 803 TokenType.L_PAREN, 804 TokenType.L_BRACKET, 805 TokenType.L_BRACE, 806 TokenType.PIVOT, 807 TokenType.UNPIVOT, 808 TokenType.TABLE_SAMPLE, 809 } 810 ) 811 812 FUNC_TOKENS: t.ClassVar = { 813 TokenType.COLLATE, 814 TokenType.COMMAND, 815 TokenType.CURRENT_DATE, 816 TokenType.CURRENT_DATETIME, 817 TokenType.CURRENT_SCHEMA, 818 TokenType.CURRENT_TIMESTAMP, 819 TokenType.CURRENT_TIME, 820 TokenType.CURRENT_USER, 821 TokenType.CURRENT_CATALOG, 822 TokenType.FILTER, 823 TokenType.FIRST, 824 TokenType.FORMAT, 825 TokenType.GET, 826 TokenType.GLOB, 827 TokenType.IDENTIFIER, 828 TokenType.INDEX, 829 TokenType.ISNULL, 830 TokenType.ILIKE, 831 TokenType.INSERT, 832 TokenType.LIKE, 833 TokenType.LOCALTIME, 834 TokenType.LOCALTIMESTAMP, 835 TokenType.MERGE, 836 TokenType.NEXT, 837 TokenType.OFFSET, 838 TokenType.PRIMARY_KEY, 839 TokenType.RANGE, 840 TokenType.REPLACE, 841 TokenType.RLIKE, 842 TokenType.ROW, 843 TokenType.SESSION_USER, 844 TokenType.UNNEST, 845 TokenType.VAR, 846 TokenType.LEFT, 847 TokenType.RIGHT, 848 TokenType.SEQUENCE, 849 TokenType.DATE, 850 TokenType.DATETIME, 851 TokenType.TABLE, 852 TokenType.TIMESTAMP, 853 TokenType.TIMESTAMPTZ, 854 TokenType.TRUNCATE, 855 TokenType.UTC_DATE, 856 TokenType.UTC_TIME, 857 TokenType.UTC_TIMESTAMP, 858 TokenType.WINDOW, 859 TokenType.XOR, 860 *TYPE_TOKENS, 861 *SUBQUERY_PREDICATES, 862 } 863 864 CONJUNCTION: t.ClassVar[dict[TokenType, type[exp.Expr]]] = { 865 TokenType.AND: exp.And, 866 } 867 868 ASSIGNMENT: t.ClassVar[dict[TokenType, type[exp.Expr]]] = { 869 TokenType.COLON_EQ: exp.PropertyEQ, 870 } 871 872 DISJUNCTION: t.ClassVar[dict[TokenType, type[exp.Expr]]] = { 873 TokenType.OR: exp.Or, 874 } 875 876 EQUALITY: t.ClassVar = { 877 TokenType.EQ: exp.EQ, 878 TokenType.NEQ: exp.NEQ, 879 TokenType.NULLSAFE_EQ: exp.NullSafeEQ, 880 } 881 882 COMPARISON: t.ClassVar = { 883 TokenType.GT: exp.GT, 884 TokenType.GTE: exp.GTE, 885 TokenType.LT: exp.LT, 886 TokenType.LTE: exp.LTE, 887 } 888 889 BITWISE: t.ClassVar = { 890 TokenType.AMP: exp.BitwiseAnd, 891 TokenType.CARET: exp.BitwiseXor, 892 TokenType.PIPE: exp.BitwiseOr, 893 } 894 895 TERM: t.ClassVar = { 896 TokenType.DASH: exp.Sub, 897 TokenType.PLUS: exp.Add, 898 TokenType.MOD: exp.Mod, 899 TokenType.COLLATE: exp.Collate, 900 } 901 902 FACTOR: t.ClassVar = { 903 TokenType.DIV: exp.IntDiv, 904 TokenType.LR_ARROW: exp.Distance, 905 TokenType.SLASH: exp.Div, 906 TokenType.STAR: exp.Mul, 907 } 908 909 EXPONENT: t.ClassVar[dict[TokenType, type[exp.Expr]]] = {} 910 911 TIMES: t.ClassVar = { 912 TokenType.TIME, 913 TokenType.TIMETZ, 914 } 915 916 TIMESTAMPS: t.ClassVar = { 917 TokenType.TIMESTAMP, 918 TokenType.TIMESTAMPNTZ, 919 TokenType.TIMESTAMPTZ, 920 TokenType.TIMESTAMPLTZ, 921 *TIMES, 922 } 923 924 SET_OPERATIONS: t.ClassVar = { 925 TokenType.UNION, 926 TokenType.INTERSECT, 927 TokenType.EXCEPT, 928 } 929 930 JOIN_METHODS: t.ClassVar = { 931 TokenType.ASOF, 932 TokenType.NATURAL, 933 TokenType.POSITIONAL, 934 } 935 936 JOIN_SIDES: t.ClassVar = { 937 TokenType.LEFT, 938 TokenType.RIGHT, 939 TokenType.FULL, 940 } 941 942 JOIN_KINDS: t.ClassVar = { 943 TokenType.ANTI, 944 TokenType.CROSS, 945 TokenType.INNER, 946 TokenType.OUTER, 947 TokenType.SEMI, 948 TokenType.STRAIGHT_JOIN, 949 } 950 951 JOIN_HINTS: t.ClassVar[set[str]] = set() 952 953 # Tokens that unambiguously end a table reference on the fast path 954 TABLE_TERMINATORS: t.ClassVar[frozenset] = frozenset( 955 { 956 TokenType.COMMA, 957 TokenType.GROUP_BY, 958 TokenType.HAVING, 959 TokenType.JOIN, 960 TokenType.LIMIT, 961 TokenType.ON, 962 TokenType.ORDER_BY, 963 TokenType.R_PAREN, 964 TokenType.SEMICOLON, 965 TokenType.SENTINEL, 966 TokenType.WHERE, 967 *SET_OPERATIONS, 968 *JOIN_KINDS, 969 *JOIN_METHODS, 970 *JOIN_SIDES, 971 } 972 ) 973 974 LAMBDAS: t.ClassVar = { 975 TokenType.ARROW: lambda self, expressions: self.expression( 976 exp.Lambda( 977 this=self._replace_lambda( 978 self._parse_disjunction(), 979 expressions, 980 ), 981 expressions=expressions, 982 ) 983 ), 984 TokenType.FARROW: lambda self, expressions: self.expression( 985 exp.Kwarg(this=exp.var(expressions[0].name), expression=self._parse_disjunction()) 986 ), 987 } 988 989 # Whether lambda args include type annotations, e.g. TRANSFORM(arr, x INT -> x + 1) in Snowflake 990 TYPED_LAMBDA_ARGS: t.ClassVar[bool] = False 991 992 LAMBDA_ARG_TERMINATORS: t.ClassVar[frozenset] = frozenset({TokenType.COMMA, TokenType.R_PAREN}) 993 994 COLUMN_OPERATORS: t.ClassVar = { 995 TokenType.DOT: None, 996 TokenType.DOTCOLON: lambda self, this, to: self.expression(exp.JSONCast(this=this, to=to)), 997 TokenType.DCOLON: lambda self, this, to: self.build_cast( 998 strict=self.STRICT_CAST, this=this, to=to 999 ), 1000 TokenType.ARROW: lambda self, this, path: self.expression( 1001 exp.JSONExtract( 1002 this=this, 1003 expression=self.dialect.to_json_path(path), 1004 only_json_types=self.JSON_ARROWS_REQUIRE_JSON_TYPE, 1005 ) 1006 ), 1007 TokenType.DARROW: lambda self, this, path: self.expression( 1008 exp.JSONExtractScalar( 1009 this=this, 1010 expression=self.dialect.to_json_path(path), 1011 only_json_types=self.JSON_ARROWS_REQUIRE_JSON_TYPE, 1012 scalar_only=self.dialect.JSON_EXTRACT_SCALAR_SCALAR_ONLY, 1013 ) 1014 ), 1015 TokenType.HASH_ARROW: lambda self, this, path: self.expression( 1016 exp.JSONBExtract(this=this, expression=path) 1017 ), 1018 TokenType.DHASH_ARROW: lambda self, this, path: self.expression( 1019 exp.JSONBExtractScalar(this=this, expression=path) 1020 ), 1021 TokenType.PLACEHOLDER: lambda self, this, key: self.expression( 1022 exp.JSONBContains(this=this, expression=key) 1023 ), 1024 } 1025 1026 CAST_COLUMN_OPERATORS: t.ClassVar = { 1027 TokenType.DOTCOLON, 1028 TokenType.DCOLON, 1029 } 1030 1031 EXPRESSION_PARSERS: t.ClassVar = { 1032 exp.Cluster: lambda self: self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 1033 exp.Column: lambda self: self._parse_column(), 1034 exp.ColumnDef: lambda self: self._parse_column_def(self._parse_column()), 1035 exp.Condition: lambda self: self._parse_disjunction(), 1036 exp.DataType: lambda self: self._parse_types(allow_identifiers=False, schema=True), 1037 exp.Expr: lambda self: self._parse_expression(), 1038 exp.From: lambda self: self._parse_from(joins=True), 1039 exp.GrantPrincipal: lambda self: self._parse_grant_principal(), 1040 exp.GrantPrivilege: lambda self: self._parse_grant_privilege(), 1041 exp.Group: lambda self: self._parse_group(), 1042 exp.Having: lambda self: self._parse_having(), 1043 exp.Hint: lambda self: self._parse_hint_body(), 1044 exp.Identifier: lambda self: self._parse_id_var(), 1045 exp.Join: lambda self: self._parse_join(), 1046 exp.Lambda: lambda self: self._parse_lambda(), 1047 exp.Lateral: lambda self: self._parse_lateral(), 1048 exp.Limit: lambda self: self._parse_limit(), 1049 exp.Offset: lambda self: self._parse_offset(), 1050 exp.Order: lambda self: self._parse_order(), 1051 exp.Ordered: lambda self: self._parse_ordered(), 1052 exp.Properties: lambda self: self._parse_properties(), 1053 exp.PartitionedByProperty: lambda self: self._parse_partitioned_by(), 1054 exp.Qualify: lambda self: self._parse_qualify(), 1055 exp.Returning: lambda self: self._parse_returning(), 1056 exp.Select: lambda self: self._parse_select(), 1057 exp.Sort: lambda self: self._parse_sort(exp.Sort, TokenType.SORT_BY), 1058 exp.Table: lambda self: self._parse_table_parts(), 1059 exp.TableAlias: lambda self: self._parse_table_alias(), 1060 exp.Tuple: lambda self: self._parse_value(values=False), 1061 exp.Whens: lambda self: self._parse_when_matched(), 1062 exp.Where: lambda self: self._parse_where(), 1063 exp.Window: lambda self: self._parse_named_window(), 1064 exp.With: lambda self: self._parse_with(), 1065 } 1066 1067 STATEMENT_PARSERS: t.ClassVar = { 1068 TokenType.ALTER: lambda self: self._parse_alter(), 1069 TokenType.ANALYZE: lambda self: self._parse_analyze(), 1070 TokenType.BEGIN: lambda self: self._parse_transaction(), 1071 TokenType.CACHE: lambda self: self._parse_cache(), 1072 TokenType.COMMENT: lambda self: self._parse_comment(), 1073 TokenType.COMMIT: lambda self: self._parse_commit_or_rollback(), 1074 TokenType.COPY: lambda self: self._parse_copy(), 1075 TokenType.CREATE: lambda self: self._parse_create(), 1076 TokenType.DELETE: lambda self: self._parse_delete(), 1077 TokenType.DESC: lambda self: self._parse_describe(), 1078 TokenType.DESCRIBE: lambda self: self._parse_describe(), 1079 TokenType.DROP: lambda self: self._parse_drop(), 1080 TokenType.GRANT: lambda self: self._parse_grant(), 1081 TokenType.REVOKE: lambda self: self._parse_revoke(), 1082 TokenType.INSERT: lambda self: self._parse_insert(), 1083 TokenType.KILL: lambda self: self._parse_kill(), 1084 TokenType.LOAD: lambda self: self._parse_load(), 1085 TokenType.MERGE: lambda self: self._parse_merge(), 1086 TokenType.PIVOT: lambda self: self._parse_simplified_pivot(), 1087 TokenType.PRAGMA: lambda self: self.expression(exp.Pragma(this=self._parse_expression())), 1088 TokenType.REFRESH: lambda self: self._parse_refresh(), 1089 TokenType.ROLLBACK: lambda self: self._parse_commit_or_rollback(), 1090 TokenType.SET: lambda self: self._parse_set(), 1091 TokenType.TRUNCATE: lambda self: self._parse_truncate_table(), 1092 TokenType.UNCACHE: lambda self: self._parse_uncache(), 1093 TokenType.UNPIVOT: lambda self: self._parse_simplified_pivot(is_unpivot=True), 1094 TokenType.UPDATE: lambda self: self._parse_update(), 1095 TokenType.USE: lambda self: self._parse_use(), 1096 TokenType.SEMICOLON: lambda self: exp.Semicolon(), 1097 } 1098 1099 UNARY_PARSERS: t.ClassVar = { 1100 TokenType.PLUS: lambda self: self._parse_unary(), # Unary + is handled as a no-op 1101 TokenType.NOT: lambda self: self.expression(exp.Not(this=self._parse_equality())), 1102 TokenType.TILDE: lambda self: self.expression(exp.BitwiseNot(this=self._parse_unary())), 1103 TokenType.DASH: lambda self: self.expression(exp.Neg(this=self._parse_unary())), 1104 TokenType.PIPE_SLASH: lambda self: self.expression(exp.Sqrt(this=self._parse_unary())), 1105 TokenType.DPIPE_SLASH: lambda self: self.expression(exp.Cbrt(this=self._parse_unary())), 1106 } 1107 1108 STRING_PARSERS: t.ClassVar = { 1109 TokenType.HEREDOC_STRING: lambda self, token: self.expression( 1110 exp.RawString(this=token.text), token 1111 ), 1112 TokenType.NATIONAL_STRING: lambda self, token: self.expression( 1113 exp.National(this=token.text), token 1114 ), 1115 TokenType.RAW_STRING: lambda self, token: self.expression( 1116 exp.RawString(this=token.text), token 1117 ), 1118 TokenType.STRING: lambda self, token: self.expression( 1119 exp.Literal(this=token.text, is_string=True), token 1120 ), 1121 TokenType.UNICODE_STRING: lambda self, token: self.expression( 1122 exp.UnicodeString( 1123 this=token.text, escape=self._match_text_seq("UESCAPE") and self._parse_string() 1124 ), 1125 token, 1126 ), 1127 } 1128 1129 NUMERIC_PARSERS: t.ClassVar = { 1130 TokenType.BIT_STRING: lambda self, token: self.expression( 1131 exp.BitString(this=token.text), token 1132 ), 1133 TokenType.BYTE_STRING: lambda self, token: self.expression( 1134 exp.ByteString( 1135 this=token.text, is_bytes=self.dialect.BYTE_STRING_IS_BYTES_TYPE or None 1136 ), 1137 token, 1138 ), 1139 TokenType.HEX_STRING: lambda self, token: self.expression( 1140 exp.HexString( 1141 this=token.text, is_integer=self.dialect.HEX_STRING_IS_INTEGER_TYPE or None 1142 ), 1143 token, 1144 ), 1145 TokenType.NUMBER: lambda self, token: self.expression( 1146 exp.Literal(this=token.text, is_string=False), token 1147 ), 1148 } 1149 1150 PRIMARY_PARSERS: t.ClassVar = { 1151 **STRING_PARSERS, 1152 **NUMERIC_PARSERS, 1153 TokenType.INTRODUCER: lambda self, token: self._parse_introducer(token), 1154 TokenType.NULL: lambda self, _: self.expression(exp.Null()), 1155 TokenType.TRUE: lambda self, _: self.expression(exp.Boolean(this=True)), 1156 TokenType.FALSE: lambda self, _: self.expression(exp.Boolean(this=False)), 1157 TokenType.SESSION_PARAMETER: lambda self, _: self._parse_session_parameter(), 1158 TokenType.STAR: lambda self, _: self._parse_star_ops(), 1159 } 1160 1161 PLACEHOLDER_PARSERS: t.ClassVar = { 1162 TokenType.PLACEHOLDER: lambda self: self.expression(exp.Placeholder()), 1163 TokenType.PARAMETER: lambda self: self._parse_parameter(), 1164 TokenType.COLON: lambda self: ( 1165 self.expression(exp.Placeholder(this=self._prev.text)) 1166 if self._match_set(self.COLON_PLACEHOLDER_TOKENS) 1167 else None 1168 ), 1169 } 1170 1171 RANGE_PARSERS: t.ClassVar = { 1172 TokenType.AT_GT: binary_range_parser(exp.ArrayContainsAll), 1173 TokenType.BETWEEN: lambda self, this: self._parse_between(this), 1174 TokenType.GLOB: binary_range_parser(exp.Glob), 1175 TokenType.ILIKE: binary_range_parser(exp.ILike), 1176 TokenType.IN: lambda self, this: self._parse_in(this), 1177 TokenType.IRLIKE: binary_range_parser(exp.RegexpILike), 1178 TokenType.IS: lambda self, this: self._parse_is(this), 1179 TokenType.LIKE: binary_range_parser(exp.Like), 1180 TokenType.LT_AT: binary_range_parser(exp.ArrayContainsAll, reverse_args=True), 1181 TokenType.OVERLAPS: binary_range_parser(exp.Overlaps), 1182 TokenType.RLIKE: binary_range_parser(exp.RegexpLike), 1183 TokenType.SIMILAR_TO: binary_range_parser(exp.SimilarTo), 1184 TokenType.FOR: lambda self, this: self._parse_comprehension(this), 1185 TokenType.QMARK_AMP: binary_range_parser(exp.JSONBContainsAllTopKeys), 1186 TokenType.QMARK_PIPE: binary_range_parser(exp.JSONBContainsAnyTopKeys), 1187 TokenType.HASH_DASH: binary_range_parser(exp.JSONBDeleteAtPath), 1188 TokenType.ADJACENT: binary_range_parser(exp.Adjacent), 1189 TokenType.OPERATOR: lambda self, this: self._parse_operator(this), 1190 TokenType.AMP_LT: binary_range_parser(exp.ExtendsLeft), 1191 TokenType.AMP_GT: binary_range_parser(exp.ExtendsRight), 1192 } 1193 1194 PIPE_SYNTAX_TRANSFORM_PARSERS: t.ClassVar = { 1195 "AGGREGATE": lambda self, query: self._parse_pipe_syntax_aggregate(query), 1196 "AS": lambda self, query: self._build_pipe_cte( 1197 query, [exp.Star()], self._parse_table_alias() 1198 ), 1199 "DISTINCT": lambda self, query: self._advance() or query.distinct(copy=False), 1200 "EXTEND": lambda self, query: self._parse_pipe_syntax_extend(query), 1201 "LIMIT": lambda self, query: self._parse_pipe_syntax_limit(query), 1202 "ORDER BY": lambda self, query: query.order_by( 1203 self._parse_order(), append=False, copy=False 1204 ), 1205 "PIVOT": lambda self, query: self._parse_pipe_syntax_pivot(query), 1206 "SELECT": lambda self, query: self._parse_pipe_syntax_select(query), 1207 "TABLESAMPLE": lambda self, query: self._parse_pipe_syntax_tablesample(query), 1208 "UNPIVOT": lambda self, query: self._parse_pipe_syntax_pivot(query), 1209 "WHERE": lambda self, query: query.where(self._parse_where(), copy=False), 1210 } 1211 1212 PROPERTY_PARSERS: t.ClassVar[dict[str, t.Callable]] = { 1213 "ALLOWED_VALUES": lambda self: self.expression( 1214 exp.AllowedValuesProperty(expressions=self._parse_csv(self._parse_primary)) 1215 ), 1216 "ALGORITHM": lambda self: self._parse_property_assignment(exp.AlgorithmProperty), 1217 "AUTO": lambda self: self._parse_auto_property(), 1218 "AUTO_INCREMENT": lambda self: self._parse_property_assignment(exp.AutoIncrementProperty), 1219 "BACKUP": lambda self: self.expression( 1220 exp.BackupProperty(this=self._parse_var(any_token=True)) 1221 ), 1222 "BLOCKCOMPRESSION": lambda self: self._parse_blockcompression(), 1223 "CHARSET": lambda self, **kwargs: self._parse_character_set(**kwargs), 1224 "CHARACTER SET": lambda self, **kwargs: self._parse_character_set(**kwargs), 1225 "CHECKSUM": lambda self: self._parse_checksum(), 1226 "CLUSTER BY": lambda self: self._parse_cluster(), 1227 "CLUSTERED": lambda self: self._parse_clustered_by(), 1228 "COLLATE": lambda self, **kwargs: self._parse_property_assignment( 1229 exp.CollateProperty, **kwargs 1230 ), 1231 "COMMENT": lambda self: self._parse_property_assignment(exp.SchemaCommentProperty), 1232 "CONTAINS": lambda self: self._parse_contains_property(), 1233 "COPY": lambda self: self._parse_copy_property(), 1234 "DATABLOCKSIZE": lambda self, **kwargs: self._parse_datablocksize(**kwargs), 1235 "DATA_DELETION": lambda self: self._parse_data_deletion_property(), 1236 "DEFINER": lambda self: self._parse_definer(), 1237 "DETERMINISTIC": lambda self: self.expression( 1238 exp.StabilityProperty(this=exp.Literal.string("IMMUTABLE")) 1239 ), 1240 "DISTRIBUTED": lambda self: self._parse_distributed_property(), 1241 "DUPLICATE": lambda self: self._parse_composite_key_property(exp.DuplicateKeyProperty), 1242 "DYNAMIC": lambda self: self.expression(exp.DynamicProperty()), 1243 "DISTKEY": lambda self: self._parse_distkey(), 1244 "DISTSTYLE": lambda self: self._parse_property_assignment(exp.DistStyleProperty), 1245 "EMPTY": lambda self: self.expression(exp.EmptyProperty()), 1246 "ENGINE": lambda self: self._parse_property_assignment(exp.EngineProperty), 1247 "ENVIRONMENT": lambda self: self.expression( 1248 exp.EnviromentProperty(expressions=self._parse_wrapped_csv(self._parse_assignment)) 1249 ), 1250 "HANDLER": lambda self: self._parse_property_assignment(exp.HandlerProperty), 1251 "EXECUTE": lambda self: self._parse_property_assignment(exp.ExecuteAsProperty), 1252 "EXTERNAL": lambda self: self.expression(exp.ExternalProperty()), 1253 "FALLBACK": lambda self, **kwargs: self._parse_fallback(**kwargs), 1254 "FORMAT": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 1255 "FREESPACE": lambda self: self._parse_freespace(), 1256 "GLOBAL": lambda self: self.expression(exp.GlobalProperty()), 1257 "HEAP": lambda self: self.expression(exp.HeapProperty()), 1258 "ICEBERG": lambda self: self.expression(exp.IcebergProperty()), 1259 "IMMUTABLE": lambda self: self.expression( 1260 exp.StabilityProperty(this=exp.Literal.string("IMMUTABLE")) 1261 ), 1262 "INHERITS": lambda self: self.expression( 1263 exp.InheritsProperty(expressions=self._parse_wrapped_csv(self._parse_table)) 1264 ), 1265 "INPUT": lambda self: self.expression(exp.InputModelProperty(this=self._parse_schema())), 1266 "JOURNAL": lambda self, **kwargs: self._parse_journal(**kwargs), 1267 "LANGUAGE": lambda self: self._parse_property_assignment(exp.LanguageProperty), 1268 "LAYOUT": lambda self: self._parse_dict_property(this="LAYOUT"), 1269 "LIFETIME": lambda self: self._parse_dict_range(this="LIFETIME"), 1270 "LIKE": lambda self: self._parse_create_like(), 1271 "LOCATION": lambda self: self._parse_property_assignment(exp.LocationProperty), 1272 "LOCK": lambda self: self._parse_locking(), 1273 "LOCKING": lambda self: self._parse_locking(), 1274 "LOG": lambda self, **kwargs: self._parse_log(**kwargs), 1275 "MATERIALIZED": lambda self: self.expression(exp.MaterializedProperty()), 1276 "MERGEBLOCKRATIO": lambda self, **kwargs: self._parse_mergeblockratio(**kwargs), 1277 "MODIFIES": lambda self: self._parse_modifies_property(), 1278 "MULTISET": lambda self: self.expression(exp.SetProperty(multi=True)), 1279 "NO": lambda self: self._parse_no_property(), 1280 "ON": lambda self: self._parse_on_property(), 1281 "ORDER BY": lambda self: self._parse_order(skip_order_token=True), 1282 "OUTPUT": lambda self: self.expression(exp.OutputModelProperty(this=self._parse_schema())), 1283 "PARTITION": lambda self: self._parse_partitioned_of(), 1284 "PARTITION BY": lambda self: self._parse_partitioned_by(), 1285 "PARTITIONED BY": lambda self: self._parse_partitioned_by(), 1286 "PARTITIONED_BY": lambda self: self._parse_partitioned_by(), 1287 "PRIMARY KEY": lambda self: self._parse_primary_key(in_props=True), 1288 "RANGE": lambda self: self._parse_dict_range(this="RANGE"), 1289 "READS": lambda self: self._parse_reads_property(), 1290 "REMOTE": lambda self: self._parse_remote_with_connection(), 1291 "RETURNS": lambda self: self._parse_returns(), 1292 "STRICT": lambda self: self.expression(exp.StrictProperty()), 1293 "STREAMING": lambda self: self.expression(exp.StreamingTableProperty()), 1294 "ROW": lambda self: self._parse_row(), 1295 "ROW_FORMAT": lambda self: self._parse_property_assignment(exp.RowFormatProperty), 1296 "SAMPLE": lambda self: self.expression( 1297 exp.SampleProperty(this=self._match_text_seq("BY") and self._parse_bitwise()) 1298 ), 1299 "SECURE": lambda self: self.expression(exp.SecureProperty()), 1300 "SECURITY": lambda self: self._parse_sql_security(), 1301 "SQL SECURITY": lambda self: self._parse_sql_security(), 1302 "SET": lambda self: self.expression(exp.SetProperty(multi=False)), 1303 "SETTINGS": lambda self: self._parse_settings_property(), 1304 "SHARING": lambda self: self._parse_property_assignment(exp.SharingProperty), 1305 "SORTKEY": lambda self: self._parse_sortkey(), 1306 "SOURCE": lambda self: self._parse_dict_property(this="SOURCE"), 1307 "STABLE": lambda self: self.expression( 1308 exp.StabilityProperty(this=exp.Literal.string("STABLE")) 1309 ), 1310 "STORED": lambda self: self._parse_stored(), 1311 "SYSTEM_VERSIONING": lambda self: self._parse_system_versioning_property(), 1312 "TBLPROPERTIES": lambda self: self._parse_wrapped_properties(), 1313 "TEMP": lambda self: self.expression(exp.TemporaryProperty()), 1314 "TEMPORARY": lambda self: self.expression(exp.TemporaryProperty()), 1315 "TO": lambda self: self._parse_to_table(), 1316 "TRANSIENT": lambda self: self.expression(exp.TransientProperty()), 1317 "TRANSFORM": lambda self: self.expression( 1318 exp.TransformModelProperty(expressions=self._parse_wrapped_csv(self._parse_expression)) 1319 ), 1320 "TTL": lambda self: self._parse_ttl(), 1321 "USING": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 1322 "UNLOGGED": lambda self: self.expression(exp.UnloggedProperty()), 1323 "VOLATILE": lambda self: self._parse_volatile_property(), 1324 "WITH": lambda self: self._parse_with_property(), 1325 } 1326 1327 CONSTRAINT_PARSERS: t.ClassVar = { 1328 "AUTOINCREMENT": lambda self: self._parse_auto_increment(), 1329 "AUTO_INCREMENT": lambda self: self._parse_auto_increment(), 1330 "CASESPECIFIC": lambda self: self.expression(exp.CaseSpecificColumnConstraint(not_=False)), 1331 "CHARACTER SET": lambda self: self.expression( 1332 exp.CharacterSetColumnConstraint(this=self._parse_var_or_string()) 1333 ), 1334 "CHECK": lambda self: self._parse_check_constraint(), 1335 "COLLATE": lambda self: self.expression( 1336 exp.CollateColumnConstraint(this=self._parse_identifier() or self._parse_column()) 1337 ), 1338 "COMMENT": lambda self: self.expression( 1339 exp.CommentColumnConstraint(this=self._parse_string()) 1340 ), 1341 "COMPRESS": lambda self: self._parse_compress(), 1342 "CLUSTERED": lambda self: self.expression( 1343 exp.ClusteredColumnConstraint(this=self._parse_wrapped_csv(self._parse_ordered)) 1344 ), 1345 "NONCLUSTERED": lambda self: self.expression( 1346 exp.NonClusteredColumnConstraint(this=self._parse_wrapped_csv(self._parse_ordered)) 1347 ), 1348 "DEFAULT": lambda self: self.expression( 1349 exp.DefaultColumnConstraint(this=self._parse_bitwise()) 1350 ), 1351 "ENCODE": lambda self: self.expression(exp.EncodeColumnConstraint(this=self._parse_var())), 1352 "EPHEMERAL": lambda self: self.expression( 1353 exp.EphemeralColumnConstraint(this=self._parse_bitwise()) 1354 ), 1355 "EXCLUDE": lambda self: self.expression( 1356 exp.ExcludeColumnConstraint(this=self._parse_index_params()) 1357 ), 1358 "FOREIGN KEY": lambda self: self._parse_foreign_key(), 1359 "FORMAT": lambda self: self.expression( 1360 exp.DateFormatColumnConstraint(this=self._parse_var_or_string()) 1361 ), 1362 "GENERATED": lambda self: self._parse_generated_as_identity(), 1363 "IDENTITY": lambda self: self._parse_auto_increment(), 1364 "INLINE": lambda self: self._parse_inline(), 1365 "LIKE": lambda self: self._parse_create_like(), 1366 "NOT": lambda self: self._parse_not_constraint(), 1367 "NULL": lambda self: self.expression(exp.NotNullColumnConstraint(allow_null=True)), 1368 "ON": lambda self: ( 1369 ( 1370 self._match(TokenType.UPDATE) 1371 and self.expression(exp.OnUpdateColumnConstraint(this=self._parse_function())) 1372 ) 1373 or self.expression(exp.OnProperty(this=self._parse_id_var())) 1374 ), 1375 "PATH": lambda self: self.expression(exp.PathColumnConstraint(this=self._parse_string())), 1376 "PERIOD": lambda self: self._parse_period_for_system_time(), 1377 "PRIMARY KEY": lambda self: self._parse_primary_key(), 1378 "REFERENCES": lambda self: self._parse_references(match=False), 1379 "TITLE": lambda self: self.expression( 1380 exp.TitleColumnConstraint(this=self._parse_var_or_string()) 1381 ), 1382 "TTL": lambda self: self.expression(exp.MergeTreeTTL(expressions=[self._parse_bitwise()])), 1383 "UNIQUE": lambda self: self._parse_unique(), 1384 "UPPERCASE": lambda self: self.expression(exp.UppercaseColumnConstraint()), 1385 "WITH": lambda self: self.expression( 1386 exp.Properties(expressions=self._parse_wrapped_properties()) 1387 ), 1388 "BUCKET": lambda self: self._parse_partitioned_by_bucket_or_truncate(), 1389 "TRUNCATE": lambda self: self._parse_partitioned_by_bucket_or_truncate(), 1390 } 1391 1392 def _parse_partitioned_by_bucket_or_truncate(self) -> exp.Expr | None: 1393 if not self._match(TokenType.L_PAREN, advance=False): 1394 # Partitioning by bucket or truncate follows the syntax: 1395 # PARTITION BY (BUCKET(..) | TRUNCATE(..)) 1396 # If we don't have parenthesis after each keyword, we should instead parse this as an identifier 1397 self._retreat(self._index - 1) 1398 return None 1399 1400 klass = ( 1401 exp.PartitionedByBucket 1402 if self._prev.text.upper() == "BUCKET" 1403 else exp.PartitionByTruncate 1404 ) 1405 1406 args = self._parse_wrapped_csv(lambda: self._parse_primary() or self._parse_column()) 1407 this, expression = seq_get(args, 0), seq_get(args, 1) 1408 1409 if isinstance(this, exp.Literal): 1410 # Check for Iceberg partition transforms (bucket / truncate) and ensure their arguments are in the right order 1411 # - For Hive, it's `bucket(<num buckets>, <col name>)` or `truncate(<num_chars>, <col_name>)` 1412 # - For Trino, it's reversed - `bucket(<col name>, <num buckets>)` or `truncate(<col_name>, <num_chars>)` 1413 # Both variants are canonicalized in the latter i.e `bucket(<col name>, <num buckets>)` 1414 # 1415 # Hive ref: https://docs.aws.amazon.com/athena/latest/ug/querying-iceberg-creating-tables.html#querying-iceberg-partitioning 1416 # Trino ref: https://docs.aws.amazon.com/athena/latest/ug/create-table-as.html#ctas-table-properties 1417 this, expression = expression, this 1418 1419 return self.expression(klass(this=this, expression=expression)) 1420 1421 ALTER_PARSERS: t.ClassVar = { 1422 "ADD": lambda self: self._parse_alter_table_add(), 1423 "AS": lambda self: self._parse_select(), 1424 "ALTER": lambda self: self._parse_alter_table_alter(), 1425 "CLUSTER BY": lambda self: self._parse_cluster(wrapped=True), 1426 "DELETE": lambda self: self.expression(exp.Delete(where=self._parse_where())), 1427 "DROP": lambda self: self._parse_alter_table_drop(), 1428 "RENAME": lambda self: self._parse_alter_table_rename(), 1429 "SET": lambda self: self._parse_alter_table_set(), 1430 "SWAP": lambda self: self.expression( 1431 exp.SwapTable(this=self._match(TokenType.WITH) and self._parse_table(schema=True)) 1432 ), 1433 } 1434 1435 ALTER_ALTER_PARSERS: t.ClassVar = { 1436 "DISTKEY": lambda self: self._parse_alter_diststyle(), 1437 "DISTSTYLE": lambda self: self._parse_alter_diststyle(), 1438 "SORTKEY": lambda self: self._parse_alter_sortkey(), 1439 "COMPOUND": lambda self: self._parse_alter_sortkey(compound=True), 1440 } 1441 1442 SCHEMA_UNNAMED_CONSTRAINTS: t.ClassVar = { 1443 "CHECK", 1444 "EXCLUDE", 1445 "FOREIGN KEY", 1446 "LIKE", 1447 "PERIOD", 1448 "PRIMARY KEY", 1449 "UNIQUE", 1450 "BUCKET", 1451 "TRUNCATE", 1452 } 1453 1454 NO_PAREN_FUNCTION_PARSERS: t.ClassVar = { 1455 "ANY": lambda self: self.expression(exp.Any(this=self._parse_bitwise())), 1456 "CASE": lambda self: self._parse_case(), 1457 "CONNECT_BY_ROOT": lambda self: self.expression( 1458 exp.ConnectByRoot(this=self._parse_column()) 1459 ), 1460 "IF": lambda self: self._parse_if(), 1461 } 1462 1463 INVALID_FUNC_NAME_TOKENS: t.ClassVar = { 1464 TokenType.IDENTIFIER, 1465 TokenType.STRING, 1466 } 1467 1468 FUNCTIONS_WITH_ALIASED_ARGS: t.ClassVar = {"STRUCT"} 1469 1470 KEY_VALUE_DEFINITIONS: t.ClassVar = (exp.Alias, exp.EQ, exp.PropertyEQ, exp.Slice) 1471 1472 FUNCTION_PARSERS: t.ClassVar[dict[str, t.Callable]] = { 1473 **{ 1474 name: lambda self: self._parse_max_min_by(exp.ArgMax) for name in exp.ArgMax.sql_names() 1475 }, 1476 **{ 1477 name: lambda self: self._parse_max_min_by(exp.ArgMin) for name in exp.ArgMin.sql_names() 1478 }, 1479 "CAST": lambda self: self._parse_cast(self.STRICT_CAST), 1480 "CEIL": lambda self: self._parse_ceil_floor(exp.Ceil), 1481 "CONVERT": lambda self: self._parse_convert(self.STRICT_CAST), 1482 "CHAR": lambda self: self._parse_char(), 1483 "CHR": lambda self: self._parse_char(), 1484 "DECODE": lambda self: self._parse_decode(), 1485 "EXTRACT": lambda self: self._parse_extract(), 1486 "FLOOR": lambda self: self._parse_ceil_floor(exp.Floor), 1487 "GAP_FILL": lambda self: self._parse_gap_fill(), 1488 "INITCAP": lambda self: self._parse_initcap(), 1489 "JSON_OBJECT": lambda self: self._parse_json_object(), 1490 "JSON_OBJECTAGG": lambda self: self._parse_json_object(agg=True), 1491 "JSON_TABLE": lambda self: self._parse_json_table(), 1492 "MATCH": lambda self: self._parse_match_against(), 1493 "NORMALIZE": lambda self: self._parse_normalize(), 1494 "OPENJSON": lambda self: self._parse_open_json(), 1495 "OVERLAY": lambda self: self._parse_overlay(), 1496 "POSITION": lambda self: self._parse_position(), 1497 "SAFE_CAST": lambda self: self._parse_cast(False, safe=True), 1498 "STRING_AGG": lambda self: self._parse_string_agg(), 1499 "SUBSTRING": lambda self: self._parse_substring(), 1500 "TRIM": lambda self: self._parse_trim(), 1501 "TRY_CAST": lambda self: self._parse_cast(False, safe=True), 1502 "TRY_CONVERT": lambda self: self._parse_convert(False, safe=True), 1503 "XMLELEMENT": lambda self: self._parse_xml_element(), 1504 "XMLTABLE": lambda self: self._parse_xml_table(), 1505 } 1506 1507 QUERY_MODIFIER_PARSERS: t.ClassVar = { 1508 TokenType.MATCH_RECOGNIZE: lambda self: ("match", self._parse_match_recognize()), 1509 TokenType.PREWHERE: lambda self: ("prewhere", self._parse_prewhere()), 1510 TokenType.WHERE: lambda self: ("where", self._parse_where()), 1511 TokenType.GROUP_BY: lambda self: ("group", self._parse_group()), 1512 TokenType.HAVING: lambda self: ("having", self._parse_having()), 1513 TokenType.QUALIFY: lambda self: ("qualify", self._parse_qualify()), 1514 TokenType.WINDOW: lambda self: ("windows", self._parse_window_clause()), 1515 TokenType.ORDER_BY: lambda self: ("order", self._parse_order()), 1516 TokenType.LIMIT: lambda self: ("limit", self._parse_limit()), 1517 TokenType.FETCH: lambda self: ("limit", self._parse_limit()), 1518 TokenType.OFFSET: lambda self: ("offset", self._parse_offset()), 1519 TokenType.FOR: lambda self: ("locks", self._parse_locks()), 1520 TokenType.LOCK: lambda self: ("locks", self._parse_locks()), 1521 TokenType.TABLE_SAMPLE: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 1522 TokenType.USING: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 1523 TokenType.CLUSTER_BY: lambda self: ( 1524 "cluster", 1525 self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 1526 ), 1527 TokenType.DISTRIBUTE_BY: lambda self: ( 1528 "distribute", 1529 self._parse_sort(exp.Distribute, TokenType.DISTRIBUTE_BY), 1530 ), 1531 TokenType.SORT_BY: lambda self: ("sort", self._parse_sort(exp.Sort, TokenType.SORT_BY)), 1532 TokenType.CONNECT_BY: lambda self: ("connect", self._parse_connect(skip_start_token=True)), 1533 TokenType.START_WITH: lambda self: ("connect", self._parse_connect()), 1534 } 1535 QUERY_MODIFIER_TOKENS: t.ClassVar = set(QUERY_MODIFIER_PARSERS) 1536 1537 SET_PARSERS: t.ClassVar = { 1538 "GLOBAL": lambda self: self._parse_set_item_assignment("GLOBAL"), 1539 "LOCAL": lambda self: self._parse_set_item_assignment("LOCAL"), 1540 "SESSION": lambda self: self._parse_set_item_assignment("SESSION"), 1541 "TRANSACTION": lambda self: self._parse_set_transaction(), 1542 } 1543 1544 SHOW_PARSERS: t.ClassVar[dict[str, t.Callable]] = {} 1545 1546 TYPE_LITERAL_PARSERS: t.ClassVar = { 1547 exp.DType.JSON: lambda self, this, _: self.expression(exp.ParseJSON(this=this)), 1548 } 1549 1550 TYPE_CONVERTERS: t.ClassVar[dict[exp.DType, t.Callable[[exp.DataType], exp.DataType]]] = {} 1551 1552 DDL_SELECT_TOKENS: t.ClassVar = {TokenType.SELECT, TokenType.WITH, TokenType.L_PAREN} 1553 1554 PRE_VOLATILE_TOKENS: t.ClassVar = {TokenType.CREATE, TokenType.REPLACE, TokenType.UNIQUE} 1555 1556 TRANSACTION_KIND: t.ClassVar = {"DEFERRED", "IMMEDIATE", "EXCLUSIVE"} 1557 TRANSACTION_CHARACTERISTICS: t.ClassVar[OPTIONS_TYPE] = { 1558 "ISOLATION": ( 1559 ("LEVEL", "REPEATABLE", "READ"), 1560 ("LEVEL", "READ", "COMMITTED"), 1561 ("LEVEL", "READ", "UNCOMITTED"), 1562 ("LEVEL", "SERIALIZABLE"), 1563 ), 1564 "READ": ("WRITE", "ONLY"), 1565 } 1566 1567 CONFLICT_ACTIONS: t.ClassVar[OPTIONS_TYPE] = { 1568 **dict.fromkeys(("ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK", "UPDATE"), tuple()), 1569 "DO": ("NOTHING", "UPDATE"), 1570 } 1571 1572 TRIGGER_TIMING: t.ClassVar[OPTIONS_TYPE] = { 1573 "INSTEAD": (("OF",),), 1574 "BEFORE": tuple(), 1575 "AFTER": tuple(), 1576 } 1577 1578 TRIGGER_DEFERRABLE: t.ClassVar[OPTIONS_TYPE] = { 1579 "NOT": (("DEFERRABLE",),), 1580 "DEFERRABLE": tuple(), 1581 } 1582 1583 CREATE_SEQUENCE: t.ClassVar[OPTIONS_TYPE] = { 1584 "SCALE": ("EXTEND", "NOEXTEND"), 1585 "SHARD": ("EXTEND", "NOEXTEND"), 1586 "NO": ("CYCLE", "CACHE", "MAXVALUE", "MINVALUE"), 1587 **dict.fromkeys( 1588 ( 1589 "SESSION", 1590 "GLOBAL", 1591 "KEEP", 1592 "NOKEEP", 1593 "ORDER", 1594 "NOORDER", 1595 "NOCACHE", 1596 "CYCLE", 1597 "NOCYCLE", 1598 "NOMINVALUE", 1599 "NOMAXVALUE", 1600 "NOSCALE", 1601 "NOSHARD", 1602 ), 1603 tuple(), 1604 ), 1605 } 1606 1607 ISOLATED_LOADING_OPTIONS: t.ClassVar[OPTIONS_TYPE] = {"FOR": ("ALL", "INSERT", "NONE")} 1608 1609 USABLES: t.ClassVar[OPTIONS_TYPE] = dict.fromkeys( 1610 ("ROLE", "WAREHOUSE", "DATABASE", "SCHEMA", "CATALOG"), tuple() 1611 ) 1612 1613 CAST_ACTIONS: t.ClassVar[OPTIONS_TYPE] = dict.fromkeys(("RENAME", "ADD"), ("FIELDS",)) 1614 1615 SCHEMA_BINDING_OPTIONS: t.ClassVar[OPTIONS_TYPE] = { 1616 "TYPE": ("EVOLUTION",), 1617 **dict.fromkeys(("BINDING", "COMPENSATION", "EVOLUTION"), tuple()), 1618 } 1619 1620 PROCEDURE_OPTIONS: t.ClassVar[OPTIONS_TYPE] = {} 1621 1622 EXECUTE_AS_OPTIONS: t.ClassVar[OPTIONS_TYPE] = dict.fromkeys( 1623 ("CALLER", "SELF", "OWNER"), tuple() 1624 ) 1625 1626 KEY_CONSTRAINT_OPTIONS: t.ClassVar[OPTIONS_TYPE] = { 1627 "NOT": ("ENFORCED",), 1628 "MATCH": ( 1629 "FULL", 1630 "PARTIAL", 1631 "SIMPLE", 1632 ), 1633 "INITIALLY": ("DEFERRED", "IMMEDIATE"), 1634 "USING": ( 1635 "BTREE", 1636 "HASH", 1637 ), 1638 **dict.fromkeys(("DEFERRABLE", "NORELY", "RELY"), tuple()), 1639 } 1640 1641 WINDOW_EXCLUDE_OPTIONS: t.ClassVar[OPTIONS_TYPE] = { 1642 "NO": ("OTHERS",), 1643 "CURRENT": ("ROW",), 1644 **dict.fromkeys(("GROUP", "TIES"), tuple()), 1645 } 1646 1647 INSERT_ALTERNATIVES: t.ClassVar = {"ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK"} 1648 1649 CLONE_KEYWORDS: t.ClassVar = {"CLONE", "COPY"} 1650 HISTORICAL_DATA_PREFIX: t.ClassVar = {"AT", "BEFORE", "END"} 1651 HISTORICAL_DATA_KIND: t.ClassVar = {"OFFSET", "STATEMENT", "STREAM", "TIMESTAMP", "VERSION"} 1652 1653 OPCLASS_FOLLOW_KEYWORDS: t.ClassVar = {"ASC", "DESC", "NULLS", "WITH"} 1654 1655 OPTYPE_FOLLOW_TOKENS: t.ClassVar = {TokenType.COMMA, TokenType.R_PAREN} 1656 1657 TABLE_INDEX_HINT_TOKENS: t.ClassVar = {TokenType.FORCE, TokenType.IGNORE, TokenType.USE} 1658 1659 VIEW_ATTRIBUTES: t.ClassVar = {"ENCRYPTION", "SCHEMABINDING", "VIEW_METADATA"} 1660 1661 WINDOW_ALIAS_TOKENS: t.ClassVar = ID_VAR_TOKENS - {TokenType.RANGE, TokenType.ROWS} 1662 WINDOW_BEFORE_PAREN_TOKENS: t.ClassVar = {TokenType.OVER} 1663 WINDOW_SIDES: t.ClassVar = {"FOLLOWING", "PRECEDING"} 1664 1665 JSON_KEY_VALUE_SEPARATOR_TOKENS: t.ClassVar = {TokenType.COLON, TokenType.COMMA, TokenType.IS} 1666 1667 FETCH_TOKENS: t.ClassVar = ID_VAR_TOKENS - {TokenType.ROW, TokenType.ROWS, TokenType.PERCENT} 1668 1669 ADD_CONSTRAINT_TOKENS: t.ClassVar = { 1670 TokenType.CONSTRAINT, 1671 TokenType.FOREIGN_KEY, 1672 TokenType.INDEX, 1673 TokenType.KEY, 1674 TokenType.PRIMARY_KEY, 1675 TokenType.UNIQUE, 1676 } 1677 1678 DISTINCT_TOKENS: t.ClassVar = {TokenType.DISTINCT} 1679 1680 UNNEST_OFFSET_ALIAS_TOKENS: t.ClassVar = TABLE_ALIAS_TOKENS - SET_OPERATIONS 1681 1682 SELECT_START_TOKENS: t.ClassVar = {TokenType.L_PAREN, TokenType.WITH, TokenType.SELECT} 1683 1684 COPY_INTO_VARLEN_OPTIONS: t.ClassVar = { 1685 "FILE_FORMAT", 1686 "COPY_OPTIONS", 1687 "FORMAT_OPTIONS", 1688 "CREDENTIAL", 1689 } 1690 1691 IS_JSON_PREDICATE_KIND: t.ClassVar = {"VALUE", "SCALAR", "ARRAY", "OBJECT"} 1692 1693 ODBC_DATETIME_LITERALS: t.ClassVar[dict[str, type[exp.Expr]]] = {} 1694 1695 ON_CONDITION_TOKENS: t.ClassVar = {"ERROR", "NULL", "TRUE", "FALSE", "EMPTY"} 1696 1697 PRIVILEGE_FOLLOW_TOKENS: t.ClassVar = {TokenType.ON, TokenType.COMMA, TokenType.L_PAREN} 1698 1699 # The style options for the DESCRIBE statement 1700 DESCRIBE_STYLES: t.ClassVar = {"ANALYZE", "EXTENDED", "FORMATTED", "HISTORY"} 1701 1702 SET_ASSIGNMENT_DELIMITERS: t.ClassVar = {"=", ":=", "TO"} 1703 1704 # The style options for the ANALYZE statement 1705 ANALYZE_STYLES: t.ClassVar = { 1706 "BUFFER_USAGE_LIMIT", 1707 "FULL", 1708 "LOCAL", 1709 "NO_WRITE_TO_BINLOG", 1710 "SAMPLE", 1711 "SKIP_LOCKED", 1712 "VERBOSE", 1713 } 1714 1715 ANALYZE_EXPRESSION_PARSERS: t.ClassVar = { 1716 "ALL": lambda self: self._parse_analyze_columns(), 1717 "COMPUTE": lambda self: self._parse_analyze_statistics(), 1718 "DELETE": lambda self: self._parse_analyze_delete(), 1719 "DROP": lambda self: self._parse_analyze_histogram(), 1720 "ESTIMATE": lambda self: self._parse_analyze_statistics(), 1721 "LIST": lambda self: self._parse_analyze_list(), 1722 "PREDICATE": lambda self: self._parse_analyze_columns(), 1723 "UPDATE": lambda self: self._parse_analyze_histogram(), 1724 "VALIDATE": lambda self: self._parse_analyze_validate(), 1725 } 1726 1727 PARTITION_KEYWORDS: t.ClassVar = {"PARTITION", "SUBPARTITION"} 1728 1729 AMBIGUOUS_ALIAS_TOKENS: t.ClassVar = (TokenType.LIMIT, TokenType.OFFSET) 1730 1731 OPERATION_MODIFIERS: t.ClassVar[set[str]] = set() 1732 1733 RECURSIVE_CTE_SEARCH_KIND: t.ClassVar = {"BREADTH", "DEPTH", "CYCLE"} 1734 1735 SECURITY_PROPERTY_KEYWORDS: t.ClassVar = {"DEFINER", "INVOKER", "NONE"} 1736 1737 MODIFIABLES: t.ClassVar = (exp.Query, exp.Table, exp.TableFromRows, exp.Values) 1738 1739 STRICT_CAST: t.ClassVar = True 1740 1741 PREFIXED_PIVOT_COLUMNS: t.ClassVar = False 1742 IDENTIFY_PIVOT_STRINGS: t.ClassVar = False 1743 1744 LOG_DEFAULTS_TO_LN: t.ClassVar = False 1745 1746 # Whether the table sample clause expects CSV syntax 1747 TABLESAMPLE_CSV: t.ClassVar = False 1748 1749 # The default method used for table sampling 1750 DEFAULT_SAMPLING_METHOD: t.ClassVar[str | None] = None 1751 1752 # Whether the SET command needs a delimiter (e.g. "=") for assignments 1753 SET_REQUIRES_ASSIGNMENT_DELIMITER: t.ClassVar = True 1754 1755 # Whether the TRIM function expects the characters to trim as its first argument 1756 TRIM_PATTERN_FIRST: t.ClassVar = False 1757 1758 # Whether string aliases are supported `SELECT COUNT(*) 'count'` 1759 STRING_ALIASES: t.ClassVar = False 1760 1761 # Whether query modifiers such as LIMIT are attached to the UNION node (vs its right operand) 1762 MODIFIERS_ATTACHED_TO_SET_OP: t.ClassVar = True 1763 SET_OP_MODIFIERS: t.ClassVar = {"order", "limit", "offset"} 1764 1765 # Whether to parse IF statements that aren't followed by a left parenthesis as commands 1766 NO_PAREN_IF_COMMANDS: t.ClassVar = True 1767 1768 # Whether the -> and ->> operators expect documents of type JSON (e.g. Postgres) 1769 JSON_ARROWS_REQUIRE_JSON_TYPE: t.ClassVar = False 1770 1771 # Whether the `:` operator is used to extract a value from a VARIANT column 1772 COLON_IS_VARIANT_EXTRACT: t.ClassVar = False 1773 1774 # Whether or not a VALUES keyword needs to be followed by '(' to form a VALUES clause. 1775 # If this is True and '(' is not found, the keyword will be treated as an identifier 1776 VALUES_FOLLOWED_BY_PAREN: t.ClassVar = True 1777 1778 # Whether implicit unnesting is supported, e.g. SELECT 1 FROM y.z AS z, z.a (Redshift) 1779 SUPPORTS_IMPLICIT_UNNEST: t.ClassVar = False 1780 1781 # Whether or not interval spans are supported, INTERVAL 1 YEAR TO MONTHS 1782 INTERVAL_SPANS: t.ClassVar = True 1783 1784 # Whether a PARTITION clause can follow a table reference 1785 SUPPORTS_PARTITION_SELECTION: t.ClassVar = False 1786 1787 # Whether the `name AS expr` schema/column constraint requires parentheses around `expr` 1788 WRAPPED_TRANSFORM_COLUMN_CONSTRAINT: t.ClassVar = True 1789 1790 # Whether the 'AS' keyword is optional in the CTE definition syntax 1791 OPTIONAL_ALIAS_TOKEN_CTE: t.ClassVar = True 1792 1793 # Whether renaming a column with an ALTER statement requires the presence of the COLUMN keyword 1794 ALTER_RENAME_REQUIRES_COLUMN: t.ClassVar = True 1795 1796 # Whether Alter statements are allowed to contain Partition specifications 1797 ALTER_TABLE_PARTITIONS: t.ClassVar = False 1798 1799 # Whether all join types have the same precedence, i.e., they "naturally" produce a left-deep tree. 1800 # In standard SQL, joins that use the JOIN keyword take higher precedence than comma-joins. That is 1801 # to say, JOIN operators happen before comma operators. This is not the case in some dialects, such 1802 # as BigQuery, where all joins have the same precedence. 1803 JOINS_HAVE_EQUAL_PRECEDENCE: t.ClassVar = False 1804 1805 # Whether TIMESTAMP <literal> can produce a zone-aware timestamp 1806 ZONE_AWARE_TIMESTAMP_CONSTRUCTOR: t.ClassVar = False 1807 1808 # Whether map literals support arbitrary expressions as keys. 1809 # When True, allows complex keys like arrays or literals: {[1, 2]: 3}, {1: 2} (e.g. DuckDB). 1810 # When False, keys are typically restricted to identifiers. 1811 MAP_KEYS_ARE_ARBITRARY_EXPRESSIONS: t.ClassVar = False 1812 1813 # Whether JSON_EXTRACT requires a JSON expression as the first argument, e.g this 1814 # is true for Snowflake but not for BigQuery which can also process strings 1815 JSON_EXTRACT_REQUIRES_JSON_EXPRESSION: t.ClassVar = False 1816 1817 # Dialects like Databricks support JOINS without join criteria 1818 # Adding an ON TRUE, makes transpilation semantically correct for other dialects 1819 ADD_JOIN_ON_TRUE: t.ClassVar = False 1820 1821 # Whether INTERVAL spans with literal format '\d+ hh:[mm:[ss[.ff]]]' 1822 # can omit the span unit `DAY TO MINUTE` or `DAY TO SECOND` 1823 SUPPORTS_OMITTED_INTERVAL_SPAN_UNIT: t.ClassVar = False 1824 1825 SHOW_TRIE: t.ClassVar[dict] = new_trie(key.split(" ") for key in SHOW_PARSERS) 1826 SET_TRIE: t.ClassVar[dict] = new_trie(key.split(" ") for key in SET_PARSERS) 1827 1828 def __init__( 1829 self, 1830 error_level: ErrorLevel | None = None, 1831 error_message_context: int = 100, 1832 max_errors: int = 3, 1833 max_nodes: int = -1, 1834 dialect: DialectType = None, 1835 ): 1836 self.error_level: ErrorLevel = error_level or ErrorLevel.IMMEDIATE 1837 self.error_message_context: int = error_message_context 1838 self.max_errors: int = max_errors 1839 self.max_nodes: int = max_nodes 1840 self.dialect: t.Any = _resolve_dialect(dialect) 1841 self.sql: str = "" 1842 self.errors: list[ParseError] = [] 1843 self._tokens: list[Token] = [] 1844 self._tokens_size: i64 = 0 1845 self._index: i64 = 0 1846 self._curr: Token = SENTINEL_NONE 1847 self._next: Token = SENTINEL_NONE 1848 self._prev: Token = SENTINEL_NONE 1849 self._prev_comments: list[str] = [] 1850 self._pipe_cte_counter: int = 0 1851 self._chunks: list[list[Token]] = [] 1852 self._chunk_index: i64 = 0 1853 self._node_count: int = 0 1854 1855 def reset(self) -> None: 1856 self.sql = "" 1857 self.errors = [] 1858 self._tokens = [] 1859 self._tokens_size = 0 1860 self._index = 0 1861 self._curr = SENTINEL_NONE 1862 self._next = SENTINEL_NONE 1863 self._prev = SENTINEL_NONE 1864 self._prev_comments = [] 1865 self._pipe_cte_counter = 0 1866 self._chunks = [] 1867 self._chunk_index = 0 1868 self._node_count = 0 1869 1870 def _advance(self, times: i64 = 1) -> None: 1871 index = self._index + times 1872 self._index = index 1873 tokens = self._tokens 1874 size = self._tokens_size 1875 self._curr = tokens[index] if index < size else SENTINEL_NONE 1876 self._next = tokens[index + 1] if index + 1 < size else SENTINEL_NONE 1877 1878 if index > 0: 1879 prev = tokens[index - 1] 1880 self._prev = prev 1881 self._prev_comments = prev.comments 1882 else: 1883 self._prev = SENTINEL_NONE 1884 self._prev_comments = [] 1885 1886 def _advance_chunk(self) -> None: 1887 self._index = -1 1888 self._tokens = self._chunks[self._chunk_index] 1889 self._tokens_size = i64(len(self._tokens)) 1890 self._chunk_index += 1 1891 self._advance() 1892 1893 def _retreat(self, index: i64) -> None: 1894 if index != self._index: 1895 self._advance(index - self._index) 1896 1897 def _add_comments(self, expression: exp.Expr | None) -> None: 1898 if expression and self._prev_comments: 1899 expression.add_comments(self._prev_comments) 1900 self._prev_comments = [] 1901 1902 def _match( 1903 self, token_type: TokenType, advance: bool = True, expression: exp.Expr | None = None 1904 ) -> bool: 1905 if self._curr.token_type == token_type: 1906 if advance: 1907 self._advance() 1908 self._add_comments(expression) 1909 return True 1910 return False 1911 1912 def _match_set(self, types: t.Collection[TokenType], advance: bool = True) -> bool: 1913 if self._curr.token_type in types: 1914 if advance: 1915 self._advance() 1916 return True 1917 return False 1918 1919 def _match_pair( 1920 self, token_type_a: TokenType, token_type_b: TokenType, advance: bool = True 1921 ) -> bool: 1922 if self._curr.token_type == token_type_a and self._next.token_type == token_type_b: 1923 if advance: 1924 self._advance(2) 1925 return True 1926 return False 1927 1928 def _match_texts(self, texts: t.Collection[str], advance: bool = True) -> bool: 1929 if self._curr.token_type != TokenType.STRING and self._curr.text.upper() in texts: 1930 if advance: 1931 self._advance() 1932 return True 1933 return False 1934 1935 def _match_text_seq(self, *texts: str, advance: bool = True) -> bool: 1936 index = self._index 1937 string_type = TokenType.STRING 1938 for text in texts: 1939 if self._curr.token_type != string_type and self._curr.text.upper() == text: 1940 self._advance() 1941 else: 1942 self._retreat(index) 1943 return False 1944 1945 if not advance: 1946 self._retreat(index) 1947 1948 return True 1949 1950 def _is_connected(self) -> bool: 1951 prev = self._prev 1952 curr = self._curr 1953 return bool(prev and curr and prev.end + 1 == curr.start) 1954 1955 def _find_sql(self, start: Token, end: Token) -> str: 1956 return self.sql[start.start : end.end + 1] 1957 1958 def raise_error(self, message: str, token: Token = SENTINEL_NONE) -> None: 1959 token = token or self._curr or self._prev or Token.string("") 1960 formatted_sql, start_context, highlight, end_context = highlight_sql( 1961 sql=self.sql, 1962 positions=[(token.start, token.end)], 1963 context_length=self.error_message_context, 1964 ) 1965 formatted_message = f"{message}. Line {token.line}, Col: {token.col}.\n {formatted_sql}" 1966 1967 error = ParseError.new( 1968 formatted_message, 1969 description=message, 1970 line=token.line, 1971 col=token.col, 1972 start_context=start_context, 1973 highlight=highlight, 1974 end_context=end_context, 1975 ) 1976 1977 if self.error_level == ErrorLevel.IMMEDIATE: 1978 raise error 1979 1980 self.errors.append(error) 1981 1982 def validate_expression(self, expression: E, args: list | None = None) -> E: 1983 if self.max_nodes > -1: 1984 self._node_count += 1 1985 if self._node_count > self.max_nodes: 1986 self.raise_error(f"Maximum number of AST nodes ({self.max_nodes}) exceeded") 1987 if self.error_level != ErrorLevel.IGNORE: 1988 for error_message in expression.error_messages(args): 1989 self.raise_error(error_message) 1990 return expression 1991 1992 def _try_parse(self, parse_method: t.Callable[[], T], retreat: bool = False) -> T | None: 1993 index = self._index 1994 error_level = self.error_level 1995 this: T | None = None 1996 1997 self.error_level = ErrorLevel.IMMEDIATE 1998 try: 1999 this = parse_method() 2000 except ParseError: 2001 this = None 2002 finally: 2003 if not this or retreat: 2004 self._retreat(index) 2005 self.error_level = error_level 2006 2007 return this 2008 2009 def parse(self, raw_tokens: list[Token], sql: str) -> list[exp.Expr | None]: 2010 """ 2011 Parses a list of tokens and returns a list of syntax trees, one tree 2012 per parsed SQL statement. 2013 2014 Args: 2015 raw_tokens: The list of tokens. 2016 sql: The original SQL string. 2017 2018 Returns: 2019 The list of the produced syntax trees. 2020 """ 2021 return self._parse( 2022 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 2023 ) 2024 2025 def parse_into( 2026 self, 2027 expression_types: exp.IntoType, 2028 raw_tokens: list[Token], 2029 sql: str | None = None, 2030 ) -> list[exp.Expr | None]: 2031 """ 2032 Parses a list of tokens into a given Expr type. If a collection of Expr 2033 types is given instead, this method will try to parse the token list into each one 2034 of them, stopping at the first for which the parsing succeeds. 2035 2036 Args: 2037 expression_types: The expression type(s) to try and parse the token list into. 2038 raw_tokens: The list of tokens. 2039 sql: The original SQL string, used to produce helpful debug messages. 2040 2041 Returns: 2042 The target Expr. 2043 """ 2044 errors = [] 2045 for expression_type in ensure_list(expression_types): 2046 parser = self.EXPRESSION_PARSERS.get(t.cast(type[exp.Expr], expression_type)) 2047 if not parser: 2048 raise TypeError(f"No parser registered for {expression_type}") 2049 2050 try: 2051 return self._parse(parser, raw_tokens, sql) 2052 except ParseError as e: 2053 e.errors[0]["into_expression"] = expression_type 2054 errors.append(e) 2055 2056 raise ParseError( 2057 f"Failed to parse '{sql or raw_tokens}' into {expression_types}", 2058 errors=merge_errors(errors), 2059 ) from errors[-1] 2060 2061 def check_errors(self) -> None: 2062 """Logs or raises any found errors, depending on the chosen error level setting.""" 2063 if self.error_level == ErrorLevel.WARN: 2064 for error in self.errors: 2065 logger.error(str(error)) 2066 elif self.error_level == ErrorLevel.RAISE and self.errors: 2067 raise ParseError( 2068 concat_messages(self.errors, self.max_errors), 2069 errors=merge_errors(self.errors), 2070 ) 2071 2072 def expression( 2073 self, 2074 instance: E, 2075 token: Token | None = None, 2076 comments: list[str] | None = None, 2077 ) -> E: 2078 if token: 2079 instance.update_positions(token) 2080 instance.add_comments(comments) if comments else self._add_comments(instance) 2081 if not instance.is_primitive: 2082 instance = self.validate_expression(instance) 2083 return instance 2084 2085 def _parse_batch_statements( 2086 self, 2087 parse_method: t.Callable[[Parser], exp.Expr | None], 2088 sep_first_statement: bool = True, 2089 ) -> list[exp.Expr | None]: 2090 expressions = [] 2091 2092 # Chunkification binds if/while statements with the first statement of the body 2093 if sep_first_statement: 2094 self._match(TokenType.BEGIN) 2095 expressions.append(parse_method(self)) 2096 2097 chunks_length = len(self._chunks) 2098 while self._chunk_index < chunks_length: 2099 self._advance_chunk() 2100 2101 if self._match(TokenType.ELSE, advance=False): 2102 return expressions 2103 2104 if expressions and not self._next and self._match(TokenType.END): 2105 expressions.append(exp.EndStatement()) 2106 continue 2107 2108 expressions.append(parse_method(self)) 2109 2110 if self._index < self._tokens_size: 2111 self.raise_error("Invalid expression / Unexpected token") 2112 2113 self.check_errors() 2114 2115 return expressions 2116 2117 def _parse( 2118 self, 2119 parse_method: t.Callable[[Parser], exp.Expr | None], 2120 raw_tokens: list[Token], 2121 sql: str | None = None, 2122 ) -> list[exp.Expr | None]: 2123 self.reset() 2124 self.sql = sql or "" 2125 2126 total = len(raw_tokens) 2127 chunks: list[list[Token]] = [[]] 2128 2129 for i, token in enumerate(raw_tokens): 2130 if token.token_type == TokenType.SEMICOLON: 2131 if token.comments: 2132 chunks.append([token]) 2133 2134 if i < total - 1: 2135 chunks.append([]) 2136 else: 2137 chunks[-1].append(token) 2138 2139 self._chunks = chunks 2140 2141 return self._parse_batch_statements(parse_method=parse_method, sep_first_statement=False) 2142 2143 def _warn_unsupported(self) -> None: 2144 if self._tokens_size <= 1: 2145 return 2146 2147 # We use _find_sql because self.sql may comprise multiple chunks, and we're only 2148 # interested in emitting a warning for the one being currently processed. 2149 sql = self._find_sql(self._tokens[0], self._tokens[-1])[: self.error_message_context] 2150 2151 logger.warning( 2152 f"'{sql}' contains unsupported syntax. Falling back to parsing as a 'Command'." 2153 ) 2154 2155 def _parse_command(self) -> exp.Command: 2156 self._warn_unsupported() 2157 comments = self._prev_comments 2158 return self.expression( 2159 exp.Command(this=self._prev.text.upper(), expression=self._parse_string()), 2160 comments=comments, 2161 ) 2162 2163 def _parse_comment(self, allow_exists: bool = True) -> exp.Expr: 2164 start = self._prev 2165 exists = self._parse_exists() if allow_exists else None 2166 2167 self._match(TokenType.ON) 2168 2169 materialized = self._match_text_seq("MATERIALIZED") 2170 kind = self._match_set(self.CREATABLES) and self._prev 2171 if not kind: 2172 return self._parse_as_command(start) 2173 2174 if kind.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 2175 this = self._parse_user_defined_function(kind=kind.token_type) 2176 elif kind.token_type == TokenType.TABLE: 2177 this = self._parse_table(alias_tokens=self.COMMENT_TABLE_ALIAS_TOKENS) 2178 elif kind.token_type == TokenType.COLUMN: 2179 this = self._parse_column() 2180 else: 2181 this = self._parse_id_var() 2182 2183 self._match(TokenType.IS) 2184 2185 return self.expression( 2186 exp.Comment( 2187 this=this, 2188 kind=kind.text, 2189 expression=self._parse_string(), 2190 exists=exists, 2191 materialized=materialized, 2192 ) 2193 ) 2194 2195 def _parse_to_table( 2196 self, 2197 ) -> exp.ToTableProperty: 2198 table = self._parse_table_parts(schema=True) 2199 return self.expression(exp.ToTableProperty(this=table)) 2200 2201 # https://clickhouse.com/docs/en/engines/table-engines/mergetree-family/mergetree#mergetree-table-ttl 2202 def _parse_ttl(self) -> exp.Expr: 2203 def _parse_ttl_action() -> exp.Expr | None: 2204 this = self._parse_bitwise() 2205 2206 if self._match_text_seq("DELETE"): 2207 return self.expression(exp.MergeTreeTTLAction(this=this, delete=True)) 2208 if self._match_text_seq("RECOMPRESS"): 2209 return self.expression( 2210 exp.MergeTreeTTLAction(this=this, recompress=self._parse_bitwise()) 2211 ) 2212 if self._match_text_seq("TO", "DISK"): 2213 return self.expression( 2214 exp.MergeTreeTTLAction(this=this, to_disk=self._parse_string()) 2215 ) 2216 if self._match_text_seq("TO", "VOLUME"): 2217 return self.expression( 2218 exp.MergeTreeTTLAction(this=this, to_volume=self._parse_string()) 2219 ) 2220 2221 return this 2222 2223 expressions = self._parse_csv(_parse_ttl_action) 2224 where = self._parse_where() 2225 group = self._parse_group() 2226 2227 aggregates = None 2228 if group and self._match(TokenType.SET): 2229 aggregates = self._parse_csv(self._parse_set_item) 2230 2231 return self.expression( 2232 exp.MergeTreeTTL( 2233 expressions=expressions, where=where, group=group, aggregates=aggregates 2234 ) 2235 ) 2236 2237 def _parse_condition(self) -> exp.Expr | None: 2238 return self._parse_wrapped(parse_method=self._parse_expression, optional=True) 2239 2240 def _parse_block(self) -> exp.Block: 2241 return self.expression( 2242 exp.Block( 2243 expressions=self._parse_batch_statements( 2244 parse_method=lambda self: self._parse_statement() 2245 ) 2246 ) 2247 ) 2248 2249 def _parse_whileblock(self) -> exp.WhileBlock: 2250 return self.expression( 2251 exp.WhileBlock(this=self._parse_condition(), body=self._parse_block()) 2252 ) 2253 2254 def _parse_statement(self) -> exp.Expr | None: 2255 if not self._curr: 2256 return None 2257 2258 if self._match_set(self.STATEMENT_PARSERS): 2259 comments = self._prev_comments 2260 stmt = self.STATEMENT_PARSERS[self._prev.token_type](self) 2261 stmt.add_comments(comments, prepend=True) 2262 return stmt 2263 2264 if self._match_set(self.dialect.tokenizer_class.COMMANDS): 2265 return self._parse_command() 2266 2267 if self._match_text_seq("WHILE"): 2268 return self._parse_whileblock() 2269 2270 expression = self._parse_expression() 2271 expression = self._parse_set_operations(expression) if expression else self._parse_select() 2272 2273 if isinstance(expression, exp.Subquery) and self._match(TokenType.PIPE_GT, advance=False): 2274 expression = self._parse_pipe_syntax_query(expression) 2275 2276 return self._parse_query_modifiers(expression) 2277 2278 def _parse_drop(self, exists: bool = False) -> exp.Drop | exp.Command: 2279 start = self._prev 2280 temporary = self._match(TokenType.TEMPORARY) 2281 materialized = self._match_text_seq("MATERIALIZED") 2282 iceberg = self._match_text_seq("ICEBERG") 2283 2284 kind = self._match_set(self.CREATABLES) and self._prev.text.upper() 2285 if not kind or (iceberg and kind and kind != "TABLE"): 2286 return self._parse_as_command(start) 2287 2288 concurrently = self._match_text_seq("CONCURRENTLY") 2289 if_exists = exists or self._parse_exists() 2290 2291 if kind == "COLUMN": 2292 this = self._parse_column() 2293 else: 2294 this = self._parse_table_parts(schema=True, is_db_reference=kind == "SCHEMA") 2295 2296 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 2297 2298 if self._match(TokenType.L_PAREN, advance=False): 2299 expressions = self._parse_wrapped_csv(self._parse_types) 2300 else: 2301 expressions = None 2302 2303 cascade_or_restrict = self._match_texts(("CASCADE", "RESTRICT")) and self._prev.text.upper() 2304 2305 return self.expression( 2306 exp.Drop( 2307 exists=if_exists, 2308 this=this, 2309 expressions=expressions, 2310 kind=self.dialect.CREATABLE_KIND_MAPPING.get(kind) or kind, 2311 temporary=temporary, 2312 materialized=materialized, 2313 cascade=cascade_or_restrict == "CASCADE", 2314 restrict=cascade_or_restrict == "RESTRICT", 2315 constraints=self._match_text_seq("CONSTRAINTS"), 2316 purge=self._match_text_seq("PURGE"), 2317 cluster=cluster, 2318 concurrently=concurrently, 2319 sync=self._match_text_seq("SYNC"), 2320 iceberg=iceberg, 2321 ) 2322 ) 2323 2324 def _parse_exists(self, not_: bool = False) -> bool | None: 2325 return ( 2326 self._match_text_seq("IF") 2327 and (not not_ or self._match(TokenType.NOT)) 2328 and self._match(TokenType.EXISTS) 2329 ) 2330 2331 def _parse_create(self) -> exp.Create | exp.Command: 2332 # Note: this can't be None because we've matched a statement parser 2333 start = self._prev 2334 2335 replace = ( 2336 start.token_type == TokenType.REPLACE 2337 or self._match_pair(TokenType.OR, TokenType.REPLACE) 2338 or self._match_pair(TokenType.OR, TokenType.ALTER) 2339 ) 2340 refresh = self._match_pair(TokenType.OR, TokenType.REFRESH) 2341 2342 unique = self._match(TokenType.UNIQUE) 2343 2344 if self._match_text_seq("CLUSTERED", "COLUMNSTORE"): 2345 clustered = True 2346 elif self._match_text_seq("NONCLUSTERED", "COLUMNSTORE") or self._match_text_seq( 2347 "COLUMNSTORE" 2348 ): 2349 clustered = False 2350 else: 2351 clustered = None 2352 2353 if self._match_pair(TokenType.TABLE, TokenType.FUNCTION, advance=False): 2354 self._advance() 2355 2356 properties = None 2357 create_token = self._match_set(self.CREATABLES) and self._prev 2358 2359 if not create_token: 2360 # exp.Properties.Location.POST_CREATE 2361 properties = self._parse_properties() 2362 create_token = self._match_set(self.CREATABLES) and self._prev 2363 2364 if not properties or not create_token: 2365 return self._parse_as_command(start) 2366 2367 create_token_type = t.cast(Token, create_token).token_type 2368 2369 concurrently = self._match_text_seq("CONCURRENTLY") 2370 exists = self._parse_exists(not_=True) 2371 this = None 2372 expression: exp.Expr | None = None 2373 indexes = None 2374 no_schema_binding = None 2375 begin = None 2376 clone = None 2377 2378 def extend_props(temp_props: exp.Properties | None) -> None: 2379 nonlocal properties 2380 if properties and temp_props: 2381 properties.expressions.extend(temp_props.expressions) 2382 elif temp_props: 2383 properties = temp_props 2384 2385 if create_token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 2386 this = self._parse_user_defined_function(kind=create_token_type) 2387 2388 # exp.Properties.Location.POST_SCHEMA ("schema" here is the UDF's type signature) 2389 extend_props(self._parse_properties()) 2390 2391 expression = self._parse_heredoc() if self._match(TokenType.ALIAS) else None 2392 extend_props(self._parse_function_properties()) 2393 2394 if not expression: 2395 if self._match(TokenType.COMMAND): 2396 expression = self._parse_as_command(self._prev) 2397 else: 2398 begin = self._match(TokenType.BEGIN) 2399 return_ = self._match_text_seq("RETURN") 2400 2401 if self._match(TokenType.STRING, advance=False): 2402 # Takes care of BigQuery's JavaScript UDF definitions that end in an OPTIONS property 2403 # # https://cloud.google.com/bigquery/docs/reference/standard-sql/data-definition-language#create_function_statement 2404 expression = self._parse_string() 2405 extend_props(self._parse_properties()) 2406 else: 2407 expression = ( 2408 self._parse_user_defined_function_expression() 2409 if create_token_type == TokenType.FUNCTION 2410 else self._parse_block() 2411 ) 2412 2413 if return_: 2414 expression = self.expression(exp.Return(this=expression)) 2415 elif create_token_type == TokenType.INDEX: 2416 # Postgres allows anonymous indexes, eg. CREATE INDEX IF NOT EXISTS ON t(c) 2417 if not self._match(TokenType.ON): 2418 index = self._parse_id_var() 2419 anonymous = False 2420 else: 2421 index = None 2422 anonymous = True 2423 2424 this = self._parse_index(index=index, anonymous=anonymous) 2425 elif ( 2426 create_token_type == TokenType.CONSTRAINT and self._match(TokenType.TRIGGER) 2427 ) or create_token_type == TokenType.TRIGGER: 2428 if is_constraint := (create_token_type == TokenType.CONSTRAINT): 2429 create_token = self._prev 2430 2431 trigger_name = self._parse_id_var() 2432 if not trigger_name: 2433 return self._parse_as_command(start) 2434 2435 timing_var = self._parse_var_from_options(self.TRIGGER_TIMING, raise_unmatched=False) 2436 timing = timing_var.this if timing_var else None 2437 if not timing: 2438 return self._parse_as_command(start) 2439 2440 events = self._parse_trigger_events() 2441 if not self._match(TokenType.ON): 2442 self.raise_error("Expected ON in trigger definition") 2443 2444 table = self._parse_table_parts() 2445 referenced_table = self._parse_table_parts() if self._match(TokenType.FROM) else None 2446 deferrable, initially = self._parse_trigger_deferrable() 2447 referencing = self._parse_trigger_referencing() 2448 for_each = self._parse_trigger_for_each() 2449 when = self._match_text_seq("WHEN") and self._parse_wrapped( 2450 self._parse_disjunction, optional=True 2451 ) 2452 execute = self._parse_trigger_execute() 2453 2454 if execute is None: 2455 return self._parse_as_command(start) 2456 2457 trigger_props = self.expression( 2458 exp.TriggerProperties( 2459 table=table, 2460 timing=timing, 2461 events=events, 2462 execute=execute, 2463 constraint=is_constraint, 2464 referenced_table=referenced_table, 2465 deferrable=deferrable, 2466 initially=initially, 2467 referencing=referencing, 2468 for_each=for_each, 2469 when=when, 2470 ) 2471 ) 2472 2473 this = trigger_name 2474 extend_props(exp.Properties(expressions=[trigger_props] if trigger_props else [])) 2475 elif create_token_type in self.DB_CREATABLES: 2476 table_parts = self._parse_table_parts( 2477 schema=True, is_db_reference=create_token_type == TokenType.SCHEMA 2478 ) 2479 2480 # exp.Properties.Location.POST_NAME 2481 self._match(TokenType.COMMA) 2482 extend_props(self._parse_properties(before=True)) 2483 2484 this = self._parse_schema(this=table_parts) 2485 2486 # exp.Properties.Location.POST_SCHEMA and POST_WITH 2487 extend_props(self._parse_properties()) 2488 2489 has_alias = self._match(TokenType.ALIAS) 2490 if not self._match_set(self.DDL_SELECT_TOKENS, advance=False): 2491 # exp.Properties.Location.POST_ALIAS 2492 extend_props(self._parse_properties()) 2493 2494 if create_token_type == TokenType.SEQUENCE: 2495 expression = self._parse_types() 2496 props = self._parse_properties() 2497 if props: 2498 sequence_props = exp.SequenceProperties() 2499 options = [] 2500 for prop in props: 2501 if isinstance(prop, exp.SequenceProperties): 2502 for arg, value in prop.args.items(): 2503 if arg == "options": 2504 options.extend(value) 2505 else: 2506 sequence_props.set(arg, value) 2507 prop.pop() 2508 2509 if options: 2510 sequence_props.set("options", options) 2511 2512 props.append("expressions", sequence_props) 2513 extend_props(props) 2514 else: 2515 expression = self._parse_ddl_select() 2516 2517 # Some dialects also support using a table as an alias instead of a SELECT. 2518 # Here we fallback to this as an alternative. 2519 if not expression and has_alias: 2520 expression = self._try_parse(self._parse_table_parts) 2521 2522 if create_token_type == TokenType.TABLE: 2523 # exp.Properties.Location.POST_EXPRESSION 2524 extend_props(self._parse_properties()) 2525 2526 indexes = [] 2527 while True: 2528 index = self._parse_index() 2529 2530 # exp.Properties.Location.POST_INDEX 2531 extend_props(self._parse_properties()) 2532 if not index: 2533 break 2534 else: 2535 self._match(TokenType.COMMA) 2536 indexes.append(index) 2537 elif create_token_type == TokenType.VIEW: 2538 if self._match_text_seq("WITH", "NO", "SCHEMA", "BINDING"): 2539 no_schema_binding = True 2540 elif create_token_type in (TokenType.SINK, TokenType.SOURCE): 2541 extend_props(self._parse_properties()) 2542 2543 shallow = self._match_text_seq("SHALLOW") 2544 2545 if self._match_texts(self.CLONE_KEYWORDS): 2546 copy = self._prev.text.lower() == "copy" 2547 clone = self.expression( 2548 exp.Clone(this=self._parse_table(schema=True), shallow=shallow, copy=copy) 2549 ) 2550 2551 if self._curr and not self._match_set((TokenType.R_PAREN, TokenType.COMMA), advance=False): 2552 return self._parse_as_command(start) 2553 2554 create_kind_text = create_token.text.upper() 2555 return self.expression( 2556 exp.Create( 2557 this=this, 2558 kind=self.dialect.CREATABLE_KIND_MAPPING.get(create_kind_text) or create_kind_text, 2559 replace=replace, 2560 refresh=refresh, 2561 unique=unique, 2562 expression=expression, 2563 exists=exists, 2564 properties=properties, 2565 indexes=indexes, 2566 no_schema_binding=no_schema_binding, 2567 begin=begin, 2568 clone=clone, 2569 concurrently=concurrently, 2570 clustered=clustered, 2571 ) 2572 ) 2573 2574 def _parse_sequence_properties(self) -> exp.SequenceProperties | None: 2575 seq = exp.SequenceProperties() 2576 2577 options = [] 2578 index = self._index 2579 2580 while self._curr: 2581 self._match(TokenType.COMMA) 2582 if self._match_text_seq("INCREMENT"): 2583 self._match_text_seq("BY") 2584 self._match_text_seq("=") 2585 seq.set("increment", self._parse_term()) 2586 elif self._match_text_seq("MINVALUE"): 2587 seq.set("minvalue", self._parse_term()) 2588 elif self._match_text_seq("MAXVALUE"): 2589 seq.set("maxvalue", self._parse_term()) 2590 elif self._match(TokenType.START_WITH) or self._match_text_seq("START"): 2591 self._match_text_seq("=") 2592 seq.set("start", self._parse_term()) 2593 elif self._match_text_seq("CACHE"): 2594 # T-SQL allows empty CACHE which is initialized dynamically 2595 seq.set("cache", self._parse_number() or True) 2596 elif self._match_text_seq("OWNED", "BY"): 2597 # "OWNED BY NONE" is the default 2598 seq.set("owned", None if self._match_text_seq("NONE") else self._parse_column()) 2599 else: 2600 opt = self._parse_var_from_options(self.CREATE_SEQUENCE, raise_unmatched=False) 2601 if opt: 2602 options.append(opt) 2603 else: 2604 break 2605 2606 seq.set("options", options if options else None) 2607 return None if self._index == index else seq 2608 2609 def _parse_trigger_events(self) -> list[exp.TriggerEvent]: 2610 events = [] 2611 2612 while True: 2613 event_type = self._match_set(self.TRIGGER_EVENTS) and self._prev.text.upper() 2614 2615 if not event_type: 2616 self.raise_error("Expected trigger event (INSERT, UPDATE, DELETE, TRUNCATE)") 2617 2618 columns = ( 2619 self._parse_csv(self._parse_column) 2620 if event_type == "UPDATE" and self._match_text_seq("OF") 2621 else None 2622 ) 2623 2624 events.append(self.expression(exp.TriggerEvent(this=event_type, columns=columns))) 2625 2626 if not self._match(TokenType.OR): 2627 break 2628 2629 return events 2630 2631 def _parse_trigger_deferrable( 2632 self, 2633 ) -> tuple[str | None, str | None]: 2634 deferrable_var = self._parse_var_from_options( 2635 self.TRIGGER_DEFERRABLE, raise_unmatched=False 2636 ) 2637 deferrable = deferrable_var.this if deferrable_var else None 2638 2639 initially = None 2640 if deferrable and self._match_text_seq("INITIALLY"): 2641 initially = ( 2642 self._prev.text.upper() if self._match_texts(("IMMEDIATE", "DEFERRED")) else None 2643 ) 2644 2645 return deferrable, initially 2646 2647 def _parse_trigger_referencing_clause(self, keyword: str) -> exp.Expr | None: 2648 if not self._match_text_seq(keyword): 2649 return None 2650 if not self._match_text_seq("TABLE"): 2651 self.raise_error(f"Expected TABLE after {keyword} in REFERENCING clause") 2652 self._match_text_seq("AS") 2653 return self._parse_id_var() 2654 2655 def _parse_trigger_referencing(self) -> exp.TriggerReferencing | None: 2656 if not self._match_text_seq("REFERENCING"): 2657 return None 2658 2659 old_alias = None 2660 new_alias = None 2661 2662 while True: 2663 if alias := self._parse_trigger_referencing_clause("OLD"): 2664 if old_alias is not None: 2665 self.raise_error("Duplicate OLD clause in REFERENCING") 2666 old_alias = alias 2667 elif alias := self._parse_trigger_referencing_clause("NEW"): 2668 if new_alias is not None: 2669 self.raise_error("Duplicate NEW clause in REFERENCING") 2670 new_alias = alias 2671 else: 2672 break 2673 2674 if old_alias is None and new_alias is None: 2675 self.raise_error("REFERENCING clause requires at least OLD TABLE or NEW TABLE") 2676 2677 return self.expression(exp.TriggerReferencing(old=old_alias, new=new_alias)) 2678 2679 def _parse_trigger_for_each(self) -> str | None: 2680 if not self._match_text_seq("FOR", "EACH"): 2681 return None 2682 2683 return self._prev.text.upper() if self._match_texts(("ROW", "STATEMENT")) else None 2684 2685 def _parse_trigger_execute(self) -> exp.TriggerExecute | None: 2686 if not self._match(TokenType.EXECUTE): 2687 return None 2688 2689 if not self._match_set((TokenType.FUNCTION, TokenType.PROCEDURE)): 2690 self.raise_error("Expected FUNCTION or PROCEDURE after EXECUTE") 2691 2692 func_call = self._parse_column() 2693 return self.expression(exp.TriggerExecute(this=func_call)) 2694 2695 def _parse_property_before(self) -> exp.Expr | list[exp.Expr] | None: 2696 # only used for teradata currently 2697 self._match(TokenType.COMMA) 2698 2699 kwargs = { 2700 "no": self._match_text_seq("NO"), 2701 "dual": self._match_text_seq("DUAL"), 2702 "before": self._match_text_seq("BEFORE"), 2703 "default": self._match_text_seq("DEFAULT"), 2704 "local": (self._match_text_seq("LOCAL") and "LOCAL") 2705 or (self._match_text_seq("NOT", "LOCAL") and "NOT LOCAL"), 2706 "after": self._match_text_seq("AFTER"), 2707 "minimum": self._match_texts(("MIN", "MINIMUM")), 2708 "maximum": self._match_texts(("MAX", "MAXIMUM")), 2709 } 2710 2711 if self._match_texts(self.PROPERTY_PARSERS): 2712 parser = self.PROPERTY_PARSERS[self._prev.text.upper()] 2713 try: 2714 return parser(self, **{k: v for k, v in kwargs.items() if v}) 2715 except TypeError: 2716 self.raise_error(f"Cannot parse property '{self._prev.text}'") 2717 2718 return None 2719 2720 def _parse_wrapped_properties(self) -> list[exp.Expr | list[exp.Expr]]: 2721 return self._parse_wrapped_csv(self._parse_property) 2722 2723 def _parse_property(self) -> exp.Expr | list[exp.Expr] | None: 2724 if self._match_texts(self.PROPERTY_PARSERS): 2725 return self.PROPERTY_PARSERS[self._prev.text.upper()](self) 2726 2727 if self._match(TokenType.DEFAULT) and self._match_texts(self.PROPERTY_PARSERS): 2728 return self.PROPERTY_PARSERS[self._prev.text.upper()](self, default=True) 2729 2730 if self._match_text_seq("COMPOUND", "SORTKEY"): 2731 return self._parse_sortkey(compound=True) 2732 2733 if self._match_text_seq("PARAMETER", "STYLE", "PANDAS"): 2734 return self.expression(exp.ParameterStyleProperty(this="PANDAS")) 2735 2736 index = self._index 2737 2738 seq_props = self._parse_sequence_properties() 2739 if seq_props: 2740 return seq_props 2741 2742 self._retreat(index) 2743 key = self._parse_column() 2744 2745 if not self._match(TokenType.EQ): 2746 self._retreat(index) 2747 return None 2748 2749 # Transform the key to exp.Dot if it's dotted identifiers wrapped in exp.Column or to exp.Var otherwise 2750 if isinstance(key, exp.Column): 2751 key = key.to_dot() if len(key.parts) > 1 else exp.var(key.name) 2752 2753 value = self._parse_bitwise() or self._parse_var(any_token=True) 2754 2755 # Transform the value to exp.Var if it was parsed as exp.Column(exp.Identifier()) 2756 if isinstance(value, exp.Column): 2757 value = exp.var(value.name) 2758 2759 return self.expression(exp.Property(this=key, value=value)) 2760 2761 def _parse_stored(self) -> exp.FileFormatProperty | exp.StorageHandlerProperty: 2762 if self._match_text_seq("BY"): 2763 return self.expression(exp.StorageHandlerProperty(this=self._parse_var_or_string())) 2764 2765 self._match(TokenType.ALIAS) 2766 input_format = self._parse_string() if self._match_text_seq("INPUTFORMAT") else None 2767 output_format = self._parse_string() if self._match_text_seq("OUTPUTFORMAT") else None 2768 2769 return self.expression( 2770 exp.FileFormatProperty( 2771 this=( 2772 self.expression( 2773 exp.InputOutputFormat( 2774 input_format=input_format, output_format=output_format 2775 ) 2776 ) 2777 if input_format or output_format 2778 else self._parse_var_or_string() or self._parse_number() or self._parse_id_var() 2779 ), 2780 hive_format=True, 2781 ) 2782 ) 2783 2784 def _parse_unquoted_field(self) -> exp.Expr | None: 2785 field = self._parse_field() 2786 if isinstance(field, exp.Identifier) and not field.quoted: 2787 field = exp.var(field) 2788 2789 return field 2790 2791 def _parse_property_assignment(self, exp_class: type[E], **kwargs: t.Any) -> E: 2792 self._match(TokenType.EQ) 2793 self._match(TokenType.ALIAS) 2794 2795 return self.expression(exp_class(this=self._parse_unquoted_field(), **kwargs)) 2796 2797 def _parse_properties(self, before: bool | None = None) -> exp.Properties | None: 2798 properties = [] 2799 while True: 2800 if before: 2801 prop = self._parse_property_before() 2802 else: 2803 prop = self._parse_property() 2804 if not prop: 2805 break 2806 for p in ensure_list(prop): 2807 properties.append(p) 2808 2809 if properties: 2810 return self.expression(exp.Properties(expressions=properties)) 2811 2812 return None 2813 2814 def _parse_fallback(self, no: bool = False) -> exp.FallbackProperty: 2815 return self.expression( 2816 exp.FallbackProperty(no=no, protection=self._match_text_seq("PROTECTION")) 2817 ) 2818 2819 def _parse_sql_security(self) -> exp.SqlSecurityProperty: 2820 return self.expression( 2821 exp.SqlSecurityProperty( 2822 this=self._match_texts(self.SECURITY_PROPERTY_KEYWORDS) and self._prev.text.upper() 2823 ) 2824 ) 2825 2826 def _parse_settings_property(self) -> exp.SettingsProperty: 2827 return self.expression( 2828 exp.SettingsProperty(expressions=self._parse_csv(self._parse_assignment)) 2829 ) 2830 2831 def _parse_volatile_property(self) -> exp.VolatileProperty | exp.StabilityProperty: 2832 if self._index >= 2: 2833 pre_volatile_token = self._tokens[self._index - 2] 2834 else: 2835 pre_volatile_token = None 2836 2837 if pre_volatile_token and pre_volatile_token.token_type in self.PRE_VOLATILE_TOKENS: 2838 return exp.VolatileProperty() 2839 2840 return self.expression(exp.StabilityProperty(this=exp.Literal.string("VOLATILE"))) 2841 2842 def _parse_retention_period(self) -> exp.Var: 2843 # Parse TSQL's HISTORY_RETENTION_PERIOD: {INFINITE | <number> DAY | DAYS | MONTH ...} 2844 number = self._parse_number() 2845 number_str = f"{number} " if number else "" 2846 unit = self._parse_var(any_token=True) 2847 return exp.var(f"{number_str}{unit}") 2848 2849 def _parse_system_versioning_property( 2850 self, with_: bool = False 2851 ) -> exp.WithSystemVersioningProperty: 2852 self._match(TokenType.EQ) 2853 prop = self.expression(exp.WithSystemVersioningProperty(on=True, with_=with_)) 2854 2855 if self._match_text_seq("OFF"): 2856 prop.set("on", False) 2857 return prop 2858 2859 self._match(TokenType.ON) 2860 if self._match(TokenType.L_PAREN): 2861 while self._curr and not self._match(TokenType.R_PAREN): 2862 if self._match_text_seq("HISTORY_TABLE", "="): 2863 prop.set("this", self._parse_table_parts()) 2864 elif self._match_text_seq("DATA_CONSISTENCY_CHECK", "="): 2865 prop.set("data_consistency", self._advance_any() and self._prev.text.upper()) 2866 elif self._match_text_seq("HISTORY_RETENTION_PERIOD", "="): 2867 prop.set("retention_period", self._parse_retention_period()) 2868 2869 self._match(TokenType.COMMA) 2870 2871 return prop 2872 2873 def _parse_data_deletion_property(self) -> exp.DataDeletionProperty: 2874 self._match(TokenType.EQ) 2875 on = self._match_text_seq("ON") or not self._match_text_seq("OFF") 2876 prop = self.expression(exp.DataDeletionProperty(on=on)) 2877 2878 if self._match(TokenType.L_PAREN): 2879 while self._curr and not self._match(TokenType.R_PAREN): 2880 if self._match_text_seq("FILTER_COLUMN", "="): 2881 prop.set("filter_column", self._parse_column()) 2882 elif self._match_text_seq("RETENTION_PERIOD", "="): 2883 prop.set("retention_period", self._parse_retention_period()) 2884 2885 self._match(TokenType.COMMA) 2886 2887 return prop 2888 2889 def _parse_distributed_property(self) -> exp.DistributedByProperty: 2890 kind = "HASH" 2891 expressions: list[exp.Expr] | None = None 2892 if self._match_text_seq("BY", "HASH"): 2893 expressions = self._parse_wrapped_csv(self._parse_id_var) 2894 elif self._match_text_seq("BY", "RANDOM"): 2895 kind = "RANDOM" 2896 2897 # If the BUCKETS keyword is not present, the number of buckets is AUTO 2898 buckets: exp.Expr | None = None 2899 if self._match_text_seq("BUCKETS") and not self._match_text_seq("AUTO"): 2900 buckets = self._parse_number() 2901 2902 return self.expression( 2903 exp.DistributedByProperty( 2904 expressions=expressions, kind=kind, buckets=buckets, order=self._parse_order() 2905 ) 2906 ) 2907 2908 def _parse_composite_key_property(self, expr_type: type[E]) -> E: 2909 self._match_text_seq("KEY") 2910 expressions = self._parse_wrapped_id_vars() 2911 return self.expression(expr_type(expressions=expressions)) 2912 2913 def _parse_with_property(self) -> exp.Expr | None | list[exp.Expr]: 2914 if self._match_text_seq("(", "SYSTEM_VERSIONING"): 2915 prop = self._parse_system_versioning_property(with_=True) 2916 self._match_r_paren() 2917 return prop 2918 2919 if self._match(TokenType.L_PAREN, advance=False): 2920 result: list[exp.Expr] = [] 2921 for i in self._parse_wrapped_properties(): 2922 result.extend(i) if isinstance(i, list) else result.append(i) 2923 return result 2924 2925 if self._match_text_seq("JOURNAL"): 2926 return self._parse_withjournaltable() 2927 2928 if self._match_texts(self.VIEW_ATTRIBUTES): 2929 return self.expression(exp.ViewAttributeProperty(this=self._prev.text.upper())) 2930 2931 if self._match_text_seq("DATA"): 2932 return self._parse_withdata(no=False) 2933 elif self._match_text_seq("NO", "DATA"): 2934 return self._parse_withdata(no=True) 2935 2936 if self._match(TokenType.SERDE_PROPERTIES, advance=False): 2937 return self._parse_serde_properties(with_=True) 2938 2939 if self._match(TokenType.SCHEMA): 2940 return self.expression( 2941 exp.WithSchemaBindingProperty( 2942 this=self._parse_var_from_options(self.SCHEMA_BINDING_OPTIONS) 2943 ) 2944 ) 2945 2946 if self._match_texts(self.PROCEDURE_OPTIONS, advance=False): 2947 return self.expression( 2948 exp.WithProcedureOptions(expressions=self._parse_csv(self._parse_procedure_option)) 2949 ) 2950 2951 if not self._next: 2952 return None 2953 2954 return self._parse_withisolatedloading() 2955 2956 def _parse_procedure_option(self) -> exp.Expr | None: 2957 if self._match_text_seq("EXECUTE", "AS"): 2958 return self.expression( 2959 exp.ExecuteAsProperty( 2960 this=self._parse_var_from_options( 2961 self.EXECUTE_AS_OPTIONS, raise_unmatched=False 2962 ) 2963 or self._parse_string() 2964 ) 2965 ) 2966 2967 return self._parse_var_from_options(self.PROCEDURE_OPTIONS) 2968 2969 # https://dev.mysql.com/doc/refman/8.0/en/create-view.html 2970 def _parse_definer(self) -> exp.DefinerProperty | None: 2971 self._match(TokenType.EQ) 2972 2973 user = self._parse_id_var() 2974 self._match(TokenType.PARAMETER) 2975 host = self._parse_id_var() or (self._match(TokenType.MOD) and self._prev.text) 2976 2977 if not user or not host: 2978 return None 2979 2980 return exp.DefinerProperty(this=f"{user}@{host}") 2981 2982 def _parse_withjournaltable(self) -> exp.WithJournalTableProperty: 2983 self._match(TokenType.TABLE) 2984 self._match(TokenType.EQ) 2985 return self.expression(exp.WithJournalTableProperty(this=self._parse_table_parts())) 2986 2987 def _parse_log(self, no: bool = False) -> exp.LogProperty: 2988 return self.expression(exp.LogProperty(no=no)) 2989 2990 def _parse_journal(self, **kwargs) -> exp.JournalProperty: 2991 return self.expression(exp.JournalProperty(**kwargs)) 2992 2993 def _parse_checksum(self) -> exp.ChecksumProperty: 2994 self._match(TokenType.EQ) 2995 2996 on = None 2997 if self._match(TokenType.ON): 2998 on = True 2999 elif self._match_text_seq("OFF"): 3000 on = False 3001 3002 return self.expression(exp.ChecksumProperty(on=on, default=self._match(TokenType.DEFAULT))) 3003 3004 def _parse_cluster(self, wrapped: bool = False) -> exp.Cluster: 3005 return self.expression( 3006 exp.Cluster( 3007 expressions=( 3008 self._parse_wrapped_csv(self._parse_ordered) 3009 if wrapped 3010 else self._parse_csv(self._parse_ordered) 3011 ) 3012 ) 3013 ) 3014 3015 def _parse_clustered_by(self) -> exp.ClusteredByProperty: 3016 self._match_text_seq("BY") 3017 3018 self._match_l_paren() 3019 expressions = self._parse_csv(self._parse_column) 3020 self._match_r_paren() 3021 3022 if self._match_text_seq("SORTED", "BY"): 3023 self._match_l_paren() 3024 sorted_by = self._parse_csv(self._parse_ordered) 3025 self._match_r_paren() 3026 else: 3027 sorted_by = None 3028 3029 self._match(TokenType.INTO) 3030 buckets = self._parse_number() 3031 self._match_text_seq("BUCKETS") 3032 3033 return self.expression( 3034 exp.ClusteredByProperty(expressions=expressions, sorted_by=sorted_by, buckets=buckets) 3035 ) 3036 3037 def _parse_copy_property(self) -> exp.CopyGrantsProperty | None: 3038 if not self._match_text_seq("GRANTS"): 3039 self._retreat(self._index - 1) 3040 return None 3041 3042 return self.expression(exp.CopyGrantsProperty()) 3043 3044 def _parse_freespace(self) -> exp.FreespaceProperty: 3045 self._match(TokenType.EQ) 3046 return self.expression( 3047 exp.FreespaceProperty(this=self._parse_number(), percent=self._match(TokenType.PERCENT)) 3048 ) 3049 3050 def _parse_mergeblockratio( 3051 self, no: bool = False, default: bool = False 3052 ) -> exp.MergeBlockRatioProperty: 3053 if self._match(TokenType.EQ): 3054 return self.expression( 3055 exp.MergeBlockRatioProperty( 3056 this=self._parse_number(), percent=self._match(TokenType.PERCENT) 3057 ) 3058 ) 3059 3060 return self.expression(exp.MergeBlockRatioProperty(no=no, default=default)) 3061 3062 def _parse_datablocksize( 3063 self, 3064 default: bool | None = None, 3065 minimum: bool | None = None, 3066 maximum: bool | None = None, 3067 ) -> exp.DataBlocksizeProperty: 3068 self._match(TokenType.EQ) 3069 size = self._parse_number() 3070 3071 units = None 3072 if self._match_texts(("BYTES", "KBYTES", "KILOBYTES")): 3073 units = self._prev.text 3074 3075 return self.expression( 3076 exp.DataBlocksizeProperty( 3077 size=size, units=units, default=default, minimum=minimum, maximum=maximum 3078 ) 3079 ) 3080 3081 def _parse_blockcompression(self) -> exp.BlockCompressionProperty: 3082 self._match(TokenType.EQ) 3083 always = self._match_text_seq("ALWAYS") 3084 manual = self._match_text_seq("MANUAL") 3085 never = self._match_text_seq("NEVER") 3086 default = self._match_text_seq("DEFAULT") 3087 3088 autotemp = None 3089 if self._match_text_seq("AUTOTEMP"): 3090 autotemp = self._parse_schema() 3091 3092 return self.expression( 3093 exp.BlockCompressionProperty( 3094 always=always, manual=manual, never=never, default=default, autotemp=autotemp 3095 ) 3096 ) 3097 3098 def _parse_withisolatedloading(self) -> exp.IsolatedLoadingProperty | None: 3099 index = self._index 3100 no = self._match_text_seq("NO") 3101 concurrent = self._match_text_seq("CONCURRENT") 3102 3103 if not self._match_text_seq("ISOLATED", "LOADING"): 3104 self._retreat(index) 3105 return None 3106 3107 target = self._parse_var_from_options(self.ISOLATED_LOADING_OPTIONS, raise_unmatched=False) 3108 return self.expression( 3109 exp.IsolatedLoadingProperty(no=no, concurrent=concurrent, target=target) 3110 ) 3111 3112 def _parse_locking(self) -> exp.LockingProperty: 3113 if self._match(TokenType.TABLE): 3114 kind = "TABLE" 3115 elif self._match(TokenType.VIEW): 3116 kind = "VIEW" 3117 elif self._match(TokenType.ROW): 3118 kind = "ROW" 3119 elif self._match_text_seq("DATABASE"): 3120 kind = "DATABASE" 3121 else: 3122 kind = None 3123 3124 if kind in ("DATABASE", "TABLE", "VIEW"): 3125 this = self._parse_table_parts() 3126 else: 3127 this = None 3128 3129 if self._match(TokenType.FOR): 3130 for_or_in = "FOR" 3131 elif self._match(TokenType.IN): 3132 for_or_in = "IN" 3133 else: 3134 for_or_in = None 3135 3136 if self._match_text_seq("ACCESS"): 3137 lock_type = "ACCESS" 3138 elif self._match_texts(("EXCL", "EXCLUSIVE")): 3139 lock_type = "EXCLUSIVE" 3140 elif self._match_text_seq("SHARE"): 3141 lock_type = "SHARE" 3142 elif self._match_text_seq("READ"): 3143 lock_type = "READ" 3144 elif self._match_text_seq("WRITE"): 3145 lock_type = "WRITE" 3146 elif self._match_text_seq("CHECKSUM"): 3147 lock_type = "CHECKSUM" 3148 else: 3149 lock_type = None 3150 3151 override = self._match_text_seq("OVERRIDE") 3152 3153 return self.expression( 3154 exp.LockingProperty( 3155 this=this, kind=kind, for_or_in=for_or_in, lock_type=lock_type, override=override 3156 ) 3157 ) 3158 3159 def _parse_partition_by(self) -> list[exp.Expr]: 3160 if self._match(TokenType.PARTITION_BY): 3161 return self._parse_csv(self._parse_disjunction) 3162 return [] 3163 3164 def _parse_partition_bound_spec(self) -> exp.PartitionBoundSpec: 3165 def _parse_partition_bound_expr() -> exp.Expr | None: 3166 if self._match_text_seq("MINVALUE"): 3167 return exp.var("MINVALUE") 3168 if self._match_text_seq("MAXVALUE"): 3169 return exp.var("MAXVALUE") 3170 return self._parse_bitwise() 3171 3172 this: exp.Expr | list[exp.Expr] | None = None 3173 expression = None 3174 from_expressions = None 3175 to_expressions = None 3176 3177 if self._match(TokenType.IN): 3178 this = self._parse_wrapped_csv(self._parse_bitwise) 3179 elif self._match(TokenType.FROM): 3180 from_expressions = self._parse_wrapped_csv(_parse_partition_bound_expr) 3181 self._match_text_seq("TO") 3182 to_expressions = self._parse_wrapped_csv(_parse_partition_bound_expr) 3183 elif self._match_text_seq("WITH", "(", "MODULUS"): 3184 this = self._parse_number() 3185 self._match_text_seq(",", "REMAINDER") 3186 expression = self._parse_number() 3187 self._match_r_paren() 3188 else: 3189 self.raise_error("Failed to parse partition bound spec.") 3190 3191 return self.expression( 3192 exp.PartitionBoundSpec( 3193 this=this, 3194 expression=expression, 3195 from_expressions=from_expressions, 3196 to_expressions=to_expressions, 3197 ) 3198 ) 3199 3200 # https://www.postgresql.org/docs/current/sql-createtable.html 3201 def _parse_partitioned_of(self) -> exp.PartitionedOfProperty | None: 3202 if not self._match_text_seq("OF"): 3203 self._retreat(self._index - 1) 3204 return None 3205 3206 this = self._parse_table(schema=True) 3207 3208 if self._match(TokenType.DEFAULT): 3209 expression: exp.Var | exp.PartitionBoundSpec = exp.var("DEFAULT") 3210 elif self._match_text_seq("FOR", "VALUES"): 3211 expression = self._parse_partition_bound_spec() 3212 else: 3213 self.raise_error("Expecting either DEFAULT or FOR VALUES clause.") 3214 3215 return self.expression(exp.PartitionedOfProperty(this=this, expression=expression)) 3216 3217 def _parse_partitioned_by(self) -> exp.PartitionedByProperty: 3218 self._match(TokenType.EQ) 3219 return self.expression( 3220 exp.PartitionedByProperty( 3221 this=self._parse_schema() or self._parse_bracket(self._parse_field()) 3222 ) 3223 ) 3224 3225 def _parse_withdata(self, no: bool = False) -> exp.WithDataProperty: 3226 if self._match_text_seq("AND", "STATISTICS"): 3227 statistics = True 3228 elif self._match_text_seq("AND", "NO", "STATISTICS"): 3229 statistics = False 3230 else: 3231 statistics = None 3232 3233 return self.expression(exp.WithDataProperty(no=no, statistics=statistics)) 3234 3235 def _parse_contains_property(self) -> exp.SqlReadWriteProperty | None: 3236 if self._match_text_seq("SQL"): 3237 return self.expression(exp.SqlReadWriteProperty(this="CONTAINS SQL")) 3238 return None 3239 3240 def _parse_modifies_property(self) -> exp.SqlReadWriteProperty | None: 3241 if self._match_text_seq("SQL", "DATA"): 3242 return self.expression(exp.SqlReadWriteProperty(this="MODIFIES SQL DATA")) 3243 return None 3244 3245 def _parse_no_property(self) -> exp.Expr | None: 3246 if self._match_text_seq("PRIMARY", "INDEX"): 3247 return exp.NoPrimaryIndexProperty() 3248 if self._match_text_seq("SQL"): 3249 return self.expression(exp.SqlReadWriteProperty(this="NO SQL")) 3250 return None 3251 3252 def _parse_on_property(self) -> exp.Expr | None: 3253 if self._match_text_seq("COMMIT", "PRESERVE", "ROWS"): 3254 return exp.OnCommitProperty() 3255 if self._match_text_seq("COMMIT", "DELETE", "ROWS"): 3256 return exp.OnCommitProperty(delete=True) 3257 return self.expression(exp.OnProperty(this=self._parse_schema(self._parse_id_var()))) 3258 3259 def _parse_reads_property(self) -> exp.SqlReadWriteProperty | None: 3260 if self._match_text_seq("SQL", "DATA"): 3261 return self.expression(exp.SqlReadWriteProperty(this="READS SQL DATA")) 3262 return None 3263 3264 def _parse_distkey(self) -> exp.DistKeyProperty: 3265 return self.expression(exp.DistKeyProperty(this=self._parse_wrapped(self._parse_id_var))) 3266 3267 def _parse_create_like(self) -> exp.LikeProperty | None: 3268 table = self._parse_table(schema=True) 3269 3270 options = [] 3271 while self._match_texts(("INCLUDING", "EXCLUDING")): 3272 this = self._prev.text.upper() 3273 3274 id_var = self._parse_id_var() 3275 if not id_var: 3276 return None 3277 3278 options.append( 3279 self.expression(exp.Property(this=this, value=exp.var(id_var.this.upper()))) 3280 ) 3281 3282 return self.expression(exp.LikeProperty(this=table, expressions=options)) 3283 3284 def _parse_sortkey(self, compound: bool = False) -> exp.SortKeyProperty: 3285 return self.expression( 3286 exp.SortKeyProperty(this=self._parse_wrapped_id_vars(), compound=compound) 3287 ) 3288 3289 def _parse_character_set(self, default: bool = False) -> exp.CharacterSetProperty: 3290 self._match(TokenType.EQ) 3291 return self.expression( 3292 exp.CharacterSetProperty(this=self._parse_var_or_string(), default=default) 3293 ) 3294 3295 def _parse_remote_with_connection(self) -> exp.RemoteWithConnectionModelProperty: 3296 self._match_text_seq("WITH", "CONNECTION") 3297 return self.expression( 3298 exp.RemoteWithConnectionModelProperty(this=self._parse_table_parts()) 3299 ) 3300 3301 def _parse_returns(self) -> exp.ReturnsProperty: 3302 value: exp.Expr | None 3303 null = None 3304 is_table = self._match(TokenType.TABLE) 3305 3306 if is_table: 3307 if self._match(TokenType.LT): 3308 value = self.expression( 3309 exp.Schema(this="TABLE", expressions=self._parse_csv(self._parse_struct_types)) 3310 ) 3311 if not self._match(TokenType.GT): 3312 self.raise_error("Expecting >") 3313 else: 3314 value = self._parse_schema(exp.var("TABLE")) 3315 elif self._match_text_seq("NULL", "ON", "NULL", "INPUT"): 3316 null = True 3317 value = None 3318 else: 3319 value = self._parse_types() 3320 3321 return self.expression(exp.ReturnsProperty(this=value, is_table=is_table, null=null)) 3322 3323 def _parse_describe(self) -> exp.Describe: 3324 kind = self._prev.text if self._match_set(self.CREATABLES) else None 3325 style: str | None = ( 3326 self._prev.text.upper() if self._match_texts(self.DESCRIBE_STYLES) else None 3327 ) 3328 if self._match(TokenType.DOT): 3329 style = None 3330 self._retreat(self._index - 2) 3331 3332 format = self._parse_property() if self._match(TokenType.FORMAT, advance=False) else None 3333 3334 if self._match_set(self.STATEMENT_PARSERS, advance=False): 3335 this = self._parse_statement() 3336 else: 3337 this = self._parse_table(schema=True) 3338 3339 properties = self._parse_properties() 3340 expressions = properties.expressions if properties else None 3341 partition = self._parse_partition() 3342 return self.expression( 3343 exp.Describe( 3344 this=this, 3345 style=style, 3346 kind=kind, 3347 expressions=expressions, 3348 partition=partition, 3349 format=format, 3350 as_json=self._match_text_seq("AS", "JSON"), 3351 ) 3352 ) 3353 3354 def _parse_multitable_inserts(self, comments: list[str] | None) -> exp.MultitableInserts: 3355 kind = self._prev.text.upper() 3356 expressions = [] 3357 3358 def parse_conditional_insert() -> exp.ConditionalInsert | None: 3359 if self._match(TokenType.WHEN): 3360 expression = self._parse_disjunction() 3361 self._match(TokenType.THEN) 3362 else: 3363 expression = None 3364 3365 else_ = self._match(TokenType.ELSE) 3366 3367 if not self._match(TokenType.INTO): 3368 return None 3369 3370 return self.expression( 3371 exp.ConditionalInsert( 3372 this=self.expression( 3373 exp.Insert( 3374 this=self._parse_table(schema=True), 3375 expression=self._parse_derived_table_values(), 3376 ) 3377 ), 3378 expression=expression, 3379 else_=else_, 3380 ) 3381 ) 3382 3383 expression = parse_conditional_insert() 3384 while expression is not None: 3385 expressions.append(expression) 3386 expression = parse_conditional_insert() 3387 3388 return self.expression( 3389 exp.MultitableInserts(kind=kind, expressions=expressions, source=self._parse_table()), 3390 comments=comments, 3391 ) 3392 3393 def _parse_insert(self) -> exp.Insert | exp.MultitableInserts: 3394 comments: list[str] = [] 3395 hint = self._parse_hint() 3396 overwrite = self._match(TokenType.OVERWRITE) 3397 ignore = self._match(TokenType.IGNORE) 3398 local = self._match_text_seq("LOCAL") 3399 alternative = None 3400 is_function = None 3401 3402 if self._match_text_seq("DIRECTORY"): 3403 this: exp.Expr | None = self.expression( 3404 exp.Directory( 3405 this=self._parse_var_or_string(), 3406 local=local, 3407 row_format=self._parse_row_format(match_row=True), 3408 ) 3409 ) 3410 else: 3411 if self._match_set((TokenType.FIRST, TokenType.ALL)): 3412 comments += ensure_list(self._prev_comments) 3413 return self._parse_multitable_inserts(comments) 3414 3415 if self._match(TokenType.OR): 3416 alternative = self._match_texts(self.INSERT_ALTERNATIVES) and self._prev.text 3417 3418 self._match(TokenType.INTO) 3419 comments += ensure_list(self._prev_comments) 3420 self._match(TokenType.TABLE) 3421 is_function = self._match(TokenType.FUNCTION) 3422 3423 this = self._parse_function() if is_function else self._parse_insert_table() 3424 3425 returning = self._parse_returning() # TSQL allows RETURNING before source 3426 3427 return self.expression( 3428 exp.Insert( 3429 hint=hint, 3430 is_function=is_function, 3431 this=this, 3432 stored=self._match_text_seq("STORED") and self._parse_stored(), 3433 by_name=self._match_text_seq("BY", "NAME"), 3434 exists=self._parse_exists(), 3435 where=self._match_pair(TokenType.REPLACE, TokenType.WHERE) 3436 and self._parse_disjunction(), 3437 partition=self._match(TokenType.PARTITION_BY) and self._parse_partitioned_by(), 3438 settings=self._match_text_seq("SETTINGS") and self._parse_settings_property(), 3439 default=self._match_text_seq("DEFAULT", "VALUES"), 3440 expression=self._parse_derived_table_values() or self._parse_ddl_select(), 3441 conflict=self._parse_on_conflict(), 3442 returning=returning or self._parse_returning(), 3443 overwrite=overwrite, 3444 alternative=alternative, 3445 ignore=ignore, 3446 source=self._match(TokenType.TABLE) and self._parse_table(), 3447 ), 3448 comments=comments, 3449 ) 3450 3451 def _parse_insert_table(self) -> exp.Expr | None: 3452 this = self._parse_table(schema=True, parse_partition=True) 3453 if isinstance(this, exp.Table) and self._match(TokenType.ALIAS, advance=False): 3454 this.set("alias", self._parse_table_alias()) 3455 return this 3456 3457 def _parse_kill(self) -> exp.Kill: 3458 kind = exp.var(self._prev.text) if self._match_texts(("CONNECTION", "QUERY")) else None 3459 3460 return self.expression(exp.Kill(this=self._parse_primary(), kind=kind)) 3461 3462 def _parse_on_conflict(self) -> exp.OnConflict | None: 3463 conflict = self._match_text_seq("ON", "CONFLICT") 3464 duplicate = self._match_text_seq("ON", "DUPLICATE", "KEY") 3465 3466 if not conflict and not duplicate: 3467 return None 3468 3469 conflict_keys = None 3470 constraint = None 3471 3472 if conflict: 3473 if self._match_text_seq("ON", "CONSTRAINT"): 3474 constraint = self._parse_id_var() 3475 elif self._match(TokenType.L_PAREN): 3476 conflict_keys = self._parse_csv(self._parse_id_var) 3477 self._match_r_paren() 3478 3479 index_predicate = self._parse_where() 3480 3481 action = self._parse_var_from_options(self.CONFLICT_ACTIONS) 3482 if self._prev.token_type == TokenType.UPDATE: 3483 self._match(TokenType.SET) 3484 expressions = self._parse_csv(self._parse_equality) 3485 else: 3486 expressions = None 3487 3488 return self.expression( 3489 exp.OnConflict( 3490 duplicate=duplicate, 3491 expressions=expressions, 3492 action=action, 3493 conflict_keys=conflict_keys, 3494 index_predicate=index_predicate, 3495 constraint=constraint, 3496 where=self._parse_where(), 3497 ) 3498 ) 3499 3500 def _parse_returning(self) -> exp.Returning | None: 3501 if not self._match(TokenType.RETURNING): 3502 return None 3503 return self.expression( 3504 exp.Returning( 3505 expressions=self._parse_csv(self._parse_expression), 3506 into=self._match(TokenType.INTO) and self._parse_table_part(), 3507 ) 3508 ) 3509 3510 def _parse_row(self) -> exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty | None: 3511 if not self._match(TokenType.FORMAT): 3512 return None 3513 return self._parse_row_format() 3514 3515 def _parse_serde_properties(self, with_: bool = False) -> exp.SerdeProperties | None: 3516 index = self._index 3517 with_ = with_ or self._match_text_seq("WITH") 3518 3519 if not self._match(TokenType.SERDE_PROPERTIES): 3520 self._retreat(index) 3521 return None 3522 return self.expression( 3523 exp.SerdeProperties(expressions=self._parse_wrapped_properties(), with_=with_) 3524 ) 3525 3526 def _parse_row_format( 3527 self, match_row: bool = False 3528 ) -> exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty | None: 3529 if match_row and not self._match_pair(TokenType.ROW, TokenType.FORMAT): 3530 return None 3531 3532 if self._match_text_seq("SERDE"): 3533 this = self._parse_string() 3534 3535 serde_properties = self._parse_serde_properties() 3536 3537 return self.expression( 3538 exp.RowFormatSerdeProperty(this=this, serde_properties=serde_properties) 3539 ) 3540 3541 self._match_text_seq("DELIMITED") 3542 3543 kwargs = {} 3544 3545 if self._match_text_seq("FIELDS", "TERMINATED", "BY"): 3546 kwargs["fields"] = self._parse_string() 3547 if self._match_text_seq("ESCAPED", "BY"): 3548 kwargs["escaped"] = self._parse_string() 3549 if self._match_text_seq("COLLECTION", "ITEMS", "TERMINATED", "BY"): 3550 kwargs["collection_items"] = self._parse_string() 3551 if self._match_text_seq("MAP", "KEYS", "TERMINATED", "BY"): 3552 kwargs["map_keys"] = self._parse_string() 3553 if self._match_text_seq("LINES", "TERMINATED", "BY"): 3554 kwargs["lines"] = self._parse_string() 3555 if self._match_text_seq("NULL", "DEFINED", "AS"): 3556 kwargs["null"] = self._parse_string() 3557 3558 return self.expression(exp.RowFormatDelimitedProperty(**kwargs)) # type: ignore 3559 3560 def _parse_load(self) -> exp.LoadData | exp.Command: 3561 if self._match_text_seq("DATA"): 3562 local = self._match_text_seq("LOCAL") 3563 self._match_text_seq("INPATH") 3564 inpath = self._parse_string() 3565 overwrite = self._match(TokenType.OVERWRITE) 3566 self._match_pair(TokenType.INTO, TokenType.TABLE) 3567 3568 return self.expression( 3569 exp.LoadData( 3570 this=self._parse_table(schema=True), 3571 local=local, 3572 overwrite=overwrite, 3573 inpath=inpath, 3574 files=self._match_text_seq("FROM", "FILES") 3575 and exp.Properties(expressions=self._parse_wrapped_properties()), 3576 partition=self._parse_partition(), 3577 input_format=self._match_text_seq("INPUTFORMAT") and self._parse_string(), 3578 serde=self._match_text_seq("SERDE") and self._parse_string(), 3579 ) 3580 ) 3581 return self._parse_as_command(self._prev) 3582 3583 def _parse_delete(self) -> exp.Delete: 3584 hint = self._parse_hint() 3585 3586 # This handles MySQL's "Multiple-Table Syntax" 3587 # https://dev.mysql.com/doc/refman/8.0/en/delete.html 3588 tables = None 3589 if not self._match(TokenType.FROM, advance=False): 3590 tables = self._parse_csv(self._parse_table) or None 3591 3592 returning = self._parse_returning() 3593 3594 return self.expression( 3595 exp.Delete( 3596 hint=hint, 3597 tables=tables, 3598 this=self._match(TokenType.FROM) and self._parse_table(joins=True), 3599 using=self._match(TokenType.USING) 3600 and self._parse_csv(lambda: self._parse_table(joins=True)), 3601 cluster=self._match(TokenType.ON) and self._parse_on_property(), 3602 where=self._parse_where(), 3603 returning=returning or self._parse_returning(), 3604 order=self._parse_order(), 3605 limit=self._parse_limit(), 3606 ) 3607 ) 3608 3609 def _parse_update(self) -> exp.Update: 3610 hint = self._parse_hint() 3611 kwargs: dict[str, object] = { 3612 "hint": hint, 3613 "this": self._parse_table(joins=True, alias_tokens=self.UPDATE_ALIAS_TOKENS), 3614 } 3615 while self._curr: 3616 if self._match(TokenType.SET): 3617 kwargs["expressions"] = self._parse_csv(self._parse_equality) 3618 elif self._match(TokenType.RETURNING, advance=False): 3619 kwargs["returning"] = self._parse_returning() 3620 elif self._match(TokenType.FROM, advance=False): 3621 from_ = self._parse_from(joins=True) 3622 table = from_.this if from_ else None 3623 if isinstance(table, exp.Subquery) and self._match(TokenType.JOIN, advance=False): 3624 table.set("joins", list(self._parse_joins()) or None) 3625 3626 kwargs["from_"] = from_ 3627 elif self._match(TokenType.WHERE, advance=False): 3628 kwargs["where"] = self._parse_where() 3629 elif self._match(TokenType.ORDER_BY, advance=False): 3630 kwargs["order"] = self._parse_order() 3631 elif self._match(TokenType.LIMIT, advance=False): 3632 kwargs["limit"] = self._parse_limit() 3633 else: 3634 break 3635 3636 return self.expression(exp.Update(**kwargs)) 3637 3638 def _parse_use(self) -> exp.Use: 3639 return self.expression( 3640 exp.Use( 3641 kind=self._parse_var_from_options(self.USABLES, raise_unmatched=False), 3642 this=self._parse_table(schema=False), 3643 ) 3644 ) 3645 3646 def _parse_uncache(self) -> exp.Uncache: 3647 if not self._match(TokenType.TABLE): 3648 self.raise_error("Expecting TABLE after UNCACHE") 3649 3650 return self.expression( 3651 exp.Uncache(exists=self._parse_exists(), this=self._parse_table(schema=True)) 3652 ) 3653 3654 def _parse_cache(self) -> exp.Cache: 3655 lazy = self._match_text_seq("LAZY") 3656 self._match(TokenType.TABLE) 3657 table = self._parse_table(schema=True) 3658 3659 options = [] 3660 if self._match_text_seq("OPTIONS"): 3661 self._match_l_paren() 3662 k = self._parse_string() 3663 self._match(TokenType.EQ) 3664 v = self._parse_string() 3665 options = [k, v] 3666 self._match_r_paren() 3667 3668 self._match(TokenType.ALIAS) 3669 return self.expression( 3670 exp.Cache( 3671 this=table, lazy=lazy, options=options, expression=self._parse_select(nested=True) 3672 ) 3673 ) 3674 3675 def _parse_partition(self) -> exp.Partition | None: 3676 if not self._match_texts(self.PARTITION_KEYWORDS): 3677 return None 3678 3679 return self.expression( 3680 exp.Partition( 3681 subpartition=self._prev.text.upper() == "SUBPARTITION", 3682 expressions=self._parse_wrapped_csv(self._parse_disjunction), 3683 ) 3684 ) 3685 3686 def _parse_value(self, values: bool = True) -> exp.Tuple | None: 3687 def _parse_value_expression() -> exp.Expr | None: 3688 if self.dialect.SUPPORTS_VALUES_DEFAULT and self._match(TokenType.DEFAULT): 3689 return exp.var(self._prev.text.upper()) 3690 return self._parse_expression() 3691 3692 if self._match(TokenType.L_PAREN): 3693 expressions = self._parse_csv(_parse_value_expression) 3694 self._match_r_paren() 3695 return self.expression(exp.Tuple(expressions=expressions)) 3696 3697 # In some dialects we can have VALUES 1, 2 which results in 1 column & 2 rows. 3698 expression = self._parse_expression() 3699 if expression: 3700 return self.expression(exp.Tuple(expressions=[expression])) 3701 return None 3702 3703 def _parse_projections( 3704 self, 3705 ) -> tuple[list[exp.Expr], list[exp.Expr] | None]: 3706 return self._parse_expressions(), None 3707 3708 def _parse_wrapped_select(self, table: bool = False) -> exp.Expr | None: 3709 if self._match_set((TokenType.PIVOT, TokenType.UNPIVOT)): 3710 this: exp.Expr | None = self._parse_simplified_pivot( 3711 is_unpivot=self._prev.token_type == TokenType.UNPIVOT 3712 ) 3713 elif self._match(TokenType.FROM): 3714 from_ = self._parse_from(skip_from_token=True, consume_pipe=True) 3715 # Support parentheses for duckdb FROM-first syntax 3716 select = self._parse_select(from_=from_) 3717 if select: 3718 if not select.args.get("from_"): 3719 select.set("from_", from_) 3720 this = select 3721 else: 3722 this = exp.select("*").from_(t.cast(exp.From, from_)) 3723 this = self._parse_query_modifiers(self._parse_set_operations(this)) 3724 else: 3725 this = ( 3726 self._parse_table(consume_pipe=True) 3727 if table 3728 else self._parse_select(nested=True, parse_set_operation=False) 3729 ) 3730 3731 # Transform exp.Values into a exp.Table to pass through parse_query_modifiers 3732 # in case a modifier (e.g. join) is following 3733 if table and isinstance(this, exp.Values) and this.alias: 3734 alias = this.args["alias"].pop() 3735 this = exp.Table(this=this, alias=alias) 3736 3737 this = self._parse_query_modifiers(self._parse_set_operations(this)) 3738 3739 return this 3740 3741 def _parse_select( 3742 self, 3743 nested: bool = False, 3744 table: bool = False, 3745 parse_subquery_alias: bool = True, 3746 parse_set_operation: bool = True, 3747 consume_pipe: bool = True, 3748 from_: exp.From | None = None, 3749 ) -> exp.Expr | None: 3750 query = self._parse_select_query( 3751 nested=nested, 3752 table=table, 3753 parse_subquery_alias=parse_subquery_alias, 3754 parse_set_operation=parse_set_operation, 3755 ) 3756 3757 if consume_pipe and self._match(TokenType.PIPE_GT, advance=False): 3758 if not query and from_: 3759 query = exp.select("*").from_(from_) 3760 if isinstance(query, exp.Query): 3761 query = self._parse_pipe_syntax_query(query) 3762 query = query.subquery(copy=False) if query and table else query 3763 3764 return query 3765 3766 def _parse_select_query( 3767 self, 3768 nested: bool = False, 3769 table: bool = False, 3770 parse_subquery_alias: bool = True, 3771 parse_set_operation: bool = True, 3772 ) -> exp.Expr | None: 3773 cte = self._parse_with() 3774 3775 if cte: 3776 this = self._parse_statement() 3777 3778 if not this: 3779 self.raise_error("Failed to parse any statement following CTE") 3780 return cte 3781 3782 while isinstance(this, exp.Subquery) and this.is_wrapper: 3783 this = this.this 3784 3785 assert this is not None 3786 if "with_" in this.arg_types: 3787 this.set("with_", cte) 3788 else: 3789 self.raise_error(f"{this.key} does not support CTE") 3790 this = cte 3791 3792 return this 3793 3794 # duckdb supports leading with FROM x 3795 from_ = ( 3796 self._parse_from(joins=True, consume_pipe=True) 3797 if self._match(TokenType.FROM, advance=False) 3798 else None 3799 ) 3800 3801 if self._match(TokenType.SELECT): 3802 comments = self._prev_comments 3803 3804 hint = self._parse_hint() 3805 3806 if self._next and not self._next.token_type == TokenType.DOT: 3807 all_ = self._match(TokenType.ALL) 3808 matched_distinct = self._match_set(self.DISTINCT_TOKENS) 3809 else: 3810 all_, matched_distinct = None, False 3811 3812 kind = ( 3813 self._prev.text.upper() 3814 if self._match(TokenType.ALIAS) and self._match_texts(("STRUCT", "VALUE")) 3815 else None 3816 ) 3817 3818 distinct: exp.Expr | None = ( 3819 self.expression( 3820 exp.Distinct( 3821 on=self._parse_value(values=False) if self._match(TokenType.ON) else None 3822 ) 3823 ) 3824 if matched_distinct 3825 else None 3826 ) 3827 3828 if all_ and distinct: 3829 self.raise_error("Cannot specify both ALL and DISTINCT after SELECT") 3830 3831 operation_modifiers = [] 3832 while self._curr and self._match_texts(self.OPERATION_MODIFIERS): 3833 operation_modifiers.append(exp.var(self._prev.text.upper())) 3834 3835 limit = self._parse_limit(top=True) 3836 projections, exclude = self._parse_projections() 3837 3838 this = self.expression( 3839 exp.Select( 3840 kind=kind, 3841 hint=hint, 3842 distinct=distinct, 3843 expressions=projections, 3844 limit=limit, 3845 exclude=exclude, 3846 operation_modifiers=operation_modifiers or None, 3847 ) 3848 ) 3849 this.comments = comments 3850 3851 into = self._parse_into() 3852 if into: 3853 this.set("into", into) 3854 3855 if not from_: 3856 from_ = self._parse_from() 3857 3858 if from_: 3859 this.set("from_", from_) 3860 3861 this = self._parse_query_modifiers(this) 3862 elif (table or nested) and self._match(TokenType.L_PAREN): 3863 comments = self._prev_comments 3864 this = self._parse_wrapped_select(table=table) 3865 3866 if this: 3867 this.add_comments(comments, prepend=True) 3868 3869 # We return early here so that the UNION isn't attached to the subquery by the 3870 # following call to _parse_set_operations, but instead becomes the parent node 3871 self._match_r_paren() 3872 return self._parse_subquery(this, parse_alias=parse_subquery_alias) 3873 elif self._match(TokenType.VALUES, advance=False): 3874 this = self._parse_derived_table_values() 3875 elif from_: 3876 this = exp.select("*").from_(from_.this, copy=False) 3877 this = self._parse_query_modifiers(this) 3878 elif self._match(TokenType.SUMMARIZE): 3879 table = self._match(TokenType.TABLE) 3880 this = self._parse_select() or self._parse_string() or self._parse_table() 3881 return self.expression(exp.Summarize(this=this, table=table)) 3882 elif self._match(TokenType.DESCRIBE): 3883 this = self._parse_describe() 3884 else: 3885 this = None 3886 3887 return self._parse_set_operations(this) if parse_set_operation else this 3888 3889 def _parse_recursive_with_search(self) -> exp.RecursiveWithSearch | None: 3890 self._match_text_seq("SEARCH") 3891 3892 kind = self._match_texts(self.RECURSIVE_CTE_SEARCH_KIND) and self._prev.text.upper() 3893 3894 if not kind: 3895 return None 3896 3897 self._match_text_seq("FIRST", "BY") 3898 3899 return self.expression( 3900 exp.RecursiveWithSearch( 3901 kind=kind, 3902 this=self._parse_id_var(), 3903 expression=self._match_text_seq("SET") and self._parse_id_var(), 3904 using=self._match_text_seq("USING") and self._parse_id_var(), 3905 ) 3906 ) 3907 3908 def _parse_with(self, skip_with_token: bool = False) -> exp.With | None: 3909 if not skip_with_token and not self._match(TokenType.WITH): 3910 return None 3911 3912 comments = self._prev_comments 3913 recursive = self._match(TokenType.RECURSIVE) 3914 3915 last_comments = None 3916 expressions = [] 3917 while True: 3918 cte = self._parse_cte() 3919 if isinstance(cte, exp.CTE): 3920 expressions.append(cte) 3921 if last_comments: 3922 cte.add_comments(last_comments) 3923 3924 if not self._match(TokenType.COMMA) and not self._match(TokenType.WITH): 3925 break 3926 else: 3927 self._match(TokenType.WITH) 3928 3929 last_comments = self._prev_comments 3930 3931 return self.expression( 3932 exp.With( 3933 expressions=expressions, 3934 recursive=recursive or None, 3935 search=self._parse_recursive_with_search(), 3936 ), 3937 comments=comments, 3938 ) 3939 3940 def _parse_cte(self) -> exp.CTE | None: 3941 index = self._index 3942 3943 alias = self._parse_table_alias(self.ID_VAR_TOKENS) 3944 if not alias or not alias.this: 3945 self.raise_error("Expected CTE to have alias") 3946 3947 key_expressions = ( 3948 self._parse_wrapped_id_vars() if self._match_text_seq("USING", "KEY") else None 3949 ) 3950 3951 if not self._match(TokenType.ALIAS) and not self.OPTIONAL_ALIAS_TOKEN_CTE: 3952 self._retreat(index) 3953 return None 3954 3955 comments = self._prev_comments 3956 3957 if self._match_text_seq("NOT", "MATERIALIZED"): 3958 materialized = False 3959 elif self._match_text_seq("MATERIALIZED"): 3960 materialized = True 3961 else: 3962 materialized = None 3963 3964 cte = self.expression( 3965 exp.CTE( 3966 this=self._parse_wrapped(self._parse_statement), 3967 alias=alias, 3968 materialized=materialized, 3969 key_expressions=key_expressions, 3970 ), 3971 comments=comments, 3972 ) 3973 3974 values = cte.this 3975 if isinstance(values, exp.Values): 3976 if values.alias: 3977 cte.set("this", exp.select("*").from_(values)) 3978 else: 3979 cte.set("this", exp.select("*").from_(exp.alias_(values, "_values", table=True))) 3980 3981 return cte 3982 3983 def _parse_table_alias( 3984 self, alias_tokens: t.Collection[TokenType] | None = None 3985 ) -> exp.TableAlias | None: 3986 # In some dialects, LIMIT and OFFSET can act as both identifiers and keywords (clauses) 3987 # so this section tries to parse the clause version and if it fails, it treats the token 3988 # as an identifier (alias) 3989 if self._can_parse_limit_or_offset(): 3990 return None 3991 3992 any_token = self._match(TokenType.ALIAS) 3993 alias = ( 3994 self._parse_id_var(any_token=any_token, tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 3995 or self._parse_string_as_identifier() 3996 ) 3997 3998 index = self._index 3999 if self._match(TokenType.L_PAREN): 4000 columns = self._parse_csv(self._parse_function_parameter) 4001 self._match_r_paren() if columns else self._retreat(index) 4002 else: 4003 columns = None 4004 4005 if not alias and not columns: 4006 return None 4007 4008 table_alias = self.expression(exp.TableAlias(this=alias, columns=columns)) 4009 4010 # We bubble up comments from the Identifier to the TableAlias 4011 if isinstance(alias, exp.Identifier): 4012 table_alias.add_comments(alias.pop_comments()) 4013 4014 return table_alias 4015 4016 def _parse_subquery( 4017 self, this: exp.Expr | None, parse_alias: bool = True 4018 ) -> exp.Subquery | None: 4019 if not this: 4020 return None 4021 4022 return self.expression( 4023 exp.Subquery( 4024 this=this, 4025 pivots=self._parse_pivots(), 4026 alias=self._parse_table_alias() if parse_alias else None, 4027 sample=self._parse_table_sample(), 4028 ) 4029 ) 4030 4031 def _implicit_unnests_to_explicit(self, this: E) -> E: 4032 from sqlglot.optimizer.normalize_identifiers import normalize_identifiers as _norm 4033 4034 refs = {_norm(this.args["from_"].this.copy(), dialect=self.dialect).alias_or_name} 4035 for i, join in enumerate(this.args.get("joins") or []): 4036 table = join.this 4037 normalized_table = table.copy() 4038 normalized_table.meta["maybe_column"] = True 4039 normalized_table = _norm(normalized_table, dialect=self.dialect) 4040 4041 if isinstance(table, exp.Table) and not join.args.get("on"): 4042 if normalized_table.parts[0].name in refs: 4043 table_as_column = table.to_column() 4044 unnest = exp.Unnest(expressions=[table_as_column]) 4045 4046 # Table.to_column creates a parent Alias node that we want to convert to 4047 # a TableAlias and attach to the Unnest, so it matches the parser's output 4048 if isinstance(table.args.get("alias"), exp.TableAlias): 4049 table_as_column.replace(table_as_column.this) 4050 exp.alias_(unnest, None, table=[table.args["alias"].this], copy=False) 4051 4052 table.replace(unnest) 4053 4054 refs.add(normalized_table.alias_or_name) 4055 4056 return this 4057 4058 @t.overload 4059 def _parse_query_modifiers(self, this: E) -> E: ... 4060 4061 @t.overload 4062 def _parse_query_modifiers(self, this: None) -> None: ... 4063 4064 def _parse_query_modifiers(self, this): 4065 if isinstance(this, self.MODIFIABLES): 4066 for join in self._parse_joins(): 4067 this.append("joins", join) 4068 for lateral in iter(self._parse_lateral, None): 4069 this.append("laterals", lateral) 4070 4071 while True: 4072 if self._match_set(self.QUERY_MODIFIER_PARSERS, advance=False): 4073 modifier_token = self._curr 4074 parser = self.QUERY_MODIFIER_PARSERS[modifier_token.token_type] 4075 key, expression = parser(self) 4076 4077 if expression: 4078 if this.args.get(key): 4079 self.raise_error( 4080 f"Found multiple '{modifier_token.text.upper()}' clauses", 4081 token=modifier_token, 4082 ) 4083 4084 this.set(key, expression) 4085 if key == "limit": 4086 offset = expression.args.get("offset") 4087 expression.set("offset", None) 4088 4089 if offset: 4090 offset = exp.Offset(expression=offset) 4091 this.set("offset", offset) 4092 4093 limit_by_expressions = expression.expressions 4094 expression.set("expressions", None) 4095 offset.set("expressions", limit_by_expressions) 4096 continue 4097 break 4098 4099 if self.SUPPORTS_IMPLICIT_UNNEST and this and this.args.get("from_"): 4100 this = self._implicit_unnests_to_explicit(this) 4101 4102 return this 4103 4104 def _parse_hint_fallback_to_string(self) -> exp.Hint | None: 4105 start = self._curr 4106 while self._curr: 4107 self._advance() 4108 4109 end = self._tokens[self._index - 1] 4110 return exp.Hint(expressions=[self._find_sql(start, end)]) 4111 4112 def _parse_hint_function_call(self) -> exp.Expr | None: 4113 return self._parse_function_call() 4114 4115 def _parse_hint_body(self) -> exp.Hint | None: 4116 start_index = self._index 4117 should_fallback_to_string = False 4118 4119 hints = [] 4120 try: 4121 for hint in iter( 4122 lambda: self._parse_csv( 4123 lambda: self._parse_hint_function_call() or self._parse_var(upper=True), 4124 ), 4125 [], 4126 ): 4127 hints.extend(hint) 4128 except ParseError: 4129 should_fallback_to_string = True 4130 4131 if should_fallback_to_string or self._curr: 4132 self._retreat(start_index) 4133 return self._parse_hint_fallback_to_string() 4134 4135 return self.expression(exp.Hint(expressions=hints)) 4136 4137 def _parse_hint(self) -> exp.Hint | None: 4138 if self._match(TokenType.HINT) and self._prev_comments: 4139 return exp.maybe_parse(self._prev_comments[0], into=exp.Hint, dialect=self.dialect) 4140 4141 return None 4142 4143 def _parse_into(self) -> exp.Into | None: 4144 if not self._match(TokenType.INTO): 4145 return None 4146 4147 temp = self._match(TokenType.TEMPORARY) 4148 unlogged = self._match_text_seq("UNLOGGED") 4149 self._match(TokenType.TABLE) 4150 4151 return self.expression( 4152 exp.Into(this=self._parse_table(schema=True), temporary=temp, unlogged=unlogged) 4153 ) 4154 4155 def _parse_from( 4156 self, 4157 joins: bool = False, 4158 skip_from_token: bool = False, 4159 consume_pipe: bool = False, 4160 ) -> exp.From | None: 4161 if not skip_from_token and not self._match(TokenType.FROM): 4162 return None 4163 4164 comments = self._prev_comments 4165 return self.expression( 4166 exp.From(this=self._parse_table(joins=joins, consume_pipe=consume_pipe)), 4167 comments=comments, 4168 ) 4169 4170 def _parse_match_recognize_measure(self) -> exp.MatchRecognizeMeasure: 4171 return self.expression( 4172 exp.MatchRecognizeMeasure( 4173 window_frame=self._match_texts(("FINAL", "RUNNING")) and self._prev.text.upper(), 4174 this=self._parse_expression(), 4175 ) 4176 ) 4177 4178 def _parse_match_recognize(self) -> exp.MatchRecognize | None: 4179 if not self._match(TokenType.MATCH_RECOGNIZE): 4180 return None 4181 4182 self._match_l_paren() 4183 4184 partition = self._parse_partition_by() 4185 order = self._parse_order() 4186 4187 measures = ( 4188 self._parse_csv(self._parse_match_recognize_measure) 4189 if self._match_text_seq("MEASURES") 4190 else None 4191 ) 4192 4193 if self._match_text_seq("ONE", "ROW", "PER", "MATCH"): 4194 rows = exp.var("ONE ROW PER MATCH") 4195 elif self._match_text_seq("ALL", "ROWS", "PER", "MATCH"): 4196 text = "ALL ROWS PER MATCH" 4197 if self._match_text_seq("SHOW", "EMPTY", "MATCHES"): 4198 text += " SHOW EMPTY MATCHES" 4199 elif self._match_text_seq("OMIT", "EMPTY", "MATCHES"): 4200 text += " OMIT EMPTY MATCHES" 4201 elif self._match_text_seq("WITH", "UNMATCHED", "ROWS"): 4202 text += " WITH UNMATCHED ROWS" 4203 rows = exp.var(text) 4204 else: 4205 rows = None 4206 4207 if self._match_text_seq("AFTER", "MATCH", "SKIP"): 4208 text = "AFTER MATCH SKIP" 4209 if self._match_text_seq("PAST", "LAST", "ROW"): 4210 text += " PAST LAST ROW" 4211 elif self._match_text_seq("TO", "NEXT", "ROW"): 4212 text += " TO NEXT ROW" 4213 elif self._match_text_seq("TO", "FIRST"): 4214 text += f" TO FIRST {self._advance_any().text}" # type: ignore 4215 elif self._match_text_seq("TO", "LAST"): 4216 text += f" TO LAST {self._advance_any().text}" # type: ignore 4217 after = exp.var(text) 4218 else: 4219 after = None 4220 4221 if self._match_text_seq("PATTERN"): 4222 self._match_l_paren() 4223 4224 if not self._curr: 4225 self.raise_error("Expecting )", self._curr) 4226 4227 paren = 1 4228 start = self._curr 4229 4230 while self._curr and paren > 0: 4231 if self._curr.token_type == TokenType.L_PAREN: 4232 paren += 1 4233 if self._curr.token_type == TokenType.R_PAREN: 4234 paren -= 1 4235 4236 end = self._prev 4237 self._advance() 4238 4239 if paren > 0: 4240 self.raise_error("Expecting )", self._curr) 4241 4242 pattern = exp.var(self._find_sql(start, end)) 4243 else: 4244 pattern = None 4245 4246 define = ( 4247 self._parse_csv(self._parse_name_as_expression) 4248 if self._match_text_seq("DEFINE") 4249 else None 4250 ) 4251 4252 self._match_r_paren() 4253 4254 return self.expression( 4255 exp.MatchRecognize( 4256 partition_by=partition, 4257 order=order, 4258 measures=measures, 4259 rows=rows, 4260 after=after, 4261 pattern=pattern, 4262 define=define, 4263 alias=self._parse_table_alias(), 4264 ) 4265 ) 4266 4267 def _parse_lateral(self) -> exp.Lateral | None: 4268 cross_apply: bool | None = None 4269 if self._match_pair(TokenType.CROSS, TokenType.APPLY): 4270 cross_apply = True 4271 elif self._match_pair(TokenType.OUTER, TokenType.APPLY): 4272 cross_apply = False 4273 4274 if cross_apply is not None: 4275 this = self._parse_select(table=True) 4276 view = None 4277 outer = None 4278 elif self._match(TokenType.LATERAL): 4279 this = self._parse_select(table=True) 4280 view = self._match(TokenType.VIEW) 4281 outer = self._match(TokenType.OUTER) 4282 else: 4283 return None 4284 4285 if not this: 4286 this = ( 4287 self._parse_unnest() 4288 or self._parse_function() 4289 or self._parse_id_var(any_token=False) 4290 ) 4291 4292 while self._match(TokenType.DOT): 4293 this = exp.Dot( 4294 this=this, 4295 expression=self._parse_function() or self._parse_id_var(any_token=False), 4296 ) 4297 4298 ordinality: bool | None = None 4299 4300 if view: 4301 table = self._parse_id_var(any_token=False) 4302 columns = self._parse_csv(self._parse_id_var) if self._match(TokenType.ALIAS) else [] 4303 table_alias: exp.TableAlias | None = self.expression( 4304 exp.TableAlias(this=table, columns=columns) 4305 ) 4306 elif isinstance(this, (exp.Subquery, exp.Unnest)) and this.alias: 4307 # We move the alias from the lateral's child node to the lateral itself 4308 table_alias = this.args["alias"].pop() 4309 else: 4310 ordinality = self._match_pair(TokenType.WITH, TokenType.ORDINALITY) 4311 table_alias = self._parse_table_alias() 4312 4313 return self.expression( 4314 exp.Lateral( 4315 this=this, 4316 view=view, 4317 outer=outer, 4318 alias=table_alias, 4319 cross_apply=cross_apply, 4320 ordinality=ordinality, 4321 ) 4322 ) 4323 4324 def _parse_stream(self) -> exp.Stream | None: 4325 index = self._index 4326 if self._match(TokenType.STREAM): 4327 if this := self._try_parse(self._parse_table): 4328 return self.expression(exp.Stream(this=this)) 4329 self._retreat(index) 4330 return None 4331 4332 def _parse_join_parts( 4333 self, 4334 ) -> tuple[Token | None, Token | None, Token | None]: 4335 return ( 4336 self._prev if self._match_set(self.JOIN_METHODS) else None, 4337 self._prev if self._match_set(self.JOIN_SIDES) else None, 4338 self._prev if self._match_set(self.JOIN_KINDS) else None, 4339 ) 4340 4341 def _parse_using_identifiers(self) -> list[exp.Expr]: 4342 def _parse_column_as_identifier() -> exp.Expr | None: 4343 this = self._parse_column() 4344 if isinstance(this, exp.Column): 4345 return this.this 4346 return this 4347 4348 return self._parse_wrapped_csv(_parse_column_as_identifier, optional=True) 4349 4350 def _parse_join( 4351 self, skip_join_token: bool = False, parse_bracket: bool = False 4352 ) -> exp.Join | None: 4353 if self._match(TokenType.COMMA): 4354 table = self._try_parse(self._parse_table) 4355 cross_join = self.expression(exp.Join(this=table)) if table else None 4356 4357 if cross_join and self.JOINS_HAVE_EQUAL_PRECEDENCE: 4358 cross_join.set("kind", "CROSS") 4359 4360 return cross_join 4361 4362 index = self._index 4363 method, side, kind = self._parse_join_parts() 4364 directed = self._match_text_seq("DIRECTED") 4365 hint = self._prev.text if self._match_texts(self.JOIN_HINTS) else None 4366 join = self._match(TokenType.JOIN) or (kind and kind.token_type == TokenType.STRAIGHT_JOIN) 4367 join_comments = self._prev_comments 4368 4369 if not skip_join_token and not join: 4370 self._retreat(index) 4371 kind = None 4372 method = None 4373 side = None 4374 4375 outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY, False) 4376 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY, False) 4377 4378 if not skip_join_token and not join and not outer_apply and not cross_apply: 4379 return None 4380 4381 kwargs: dict[str, t.Any] = {"this": self._parse_table(parse_bracket=parse_bracket)} 4382 if kind and kind.token_type == TokenType.ARRAY and self._match(TokenType.COMMA): 4383 kwargs["expressions"] = self._parse_csv( 4384 lambda: self._parse_table(parse_bracket=parse_bracket) 4385 ) 4386 4387 if method: 4388 kwargs["method"] = method.text.upper() 4389 if side: 4390 kwargs["side"] = side.text.upper() 4391 if kind: 4392 kwargs["kind"] = kind.text.upper() 4393 if hint: 4394 kwargs["hint"] = hint 4395 4396 if self._match(TokenType.MATCH_CONDITION): 4397 kwargs["match_condition"] = self._parse_wrapped(self._parse_comparison) 4398 4399 if self._match(TokenType.ON): 4400 kwargs["on"] = self._parse_disjunction() 4401 elif self._match(TokenType.USING): 4402 kwargs["using"] = self._parse_using_identifiers() 4403 elif ( 4404 not method 4405 and not (outer_apply or cross_apply) 4406 and not isinstance(kwargs["this"], exp.Unnest) 4407 and not (kind and kind.token_type in (TokenType.CROSS, TokenType.ARRAY)) 4408 ): 4409 index = self._index 4410 joins: list | None = list(self._parse_joins()) 4411 4412 if joins and self._match(TokenType.ON): 4413 kwargs["on"] = self._parse_disjunction() 4414 elif joins and self._match(TokenType.USING): 4415 kwargs["using"] = self._parse_using_identifiers() 4416 else: 4417 joins = None 4418 self._retreat(index) 4419 4420 kwargs["this"].set("joins", joins if joins else None) 4421 4422 kwargs["pivots"] = self._parse_pivots() 4423 4424 comments = [c for token in (method, side, kind) if token for c in token.comments] 4425 comments = (join_comments or []) + comments 4426 4427 if ( 4428 self.ADD_JOIN_ON_TRUE 4429 and not kwargs.get("on") 4430 and not kwargs.get("using") 4431 and not kwargs.get("method") 4432 and kwargs.get("kind") in (None, "INNER", "OUTER") 4433 ): 4434 kwargs["on"] = exp.true() 4435 4436 if directed: 4437 kwargs["directed"] = directed 4438 4439 return self.expression(exp.Join(**kwargs), comments=comments) 4440 4441 def _parse_opclass(self) -> exp.Expr | None: 4442 this = self._parse_disjunction() 4443 4444 if self._match_texts(self.OPCLASS_FOLLOW_KEYWORDS, advance=False): 4445 return this 4446 4447 if not self._match_set(self.OPTYPE_FOLLOW_TOKENS, advance=False): 4448 return self.expression(exp.Opclass(this=this, expression=self._parse_table_parts())) 4449 4450 return this 4451 4452 def _parse_index_params(self) -> exp.IndexParameters: 4453 using = self._parse_var(any_token=True) if self._match(TokenType.USING) else None 4454 4455 if self._match(TokenType.L_PAREN, advance=False): 4456 columns = self._parse_wrapped_csv(self._parse_with_operator) 4457 else: 4458 columns = None 4459 4460 include = self._parse_wrapped_id_vars() if self._match_text_seq("INCLUDE") else None 4461 partition_by = self._parse_partition_by() 4462 with_storage = self._match(TokenType.WITH) and self._parse_wrapped_properties() 4463 tablespace = ( 4464 self._parse_var(any_token=True) 4465 if self._match_text_seq("USING", "INDEX", "TABLESPACE") 4466 else None 4467 ) 4468 where = self._parse_where() 4469 4470 on = self._parse_field() if self._match(TokenType.ON) else None 4471 4472 return self.expression( 4473 exp.IndexParameters( 4474 using=using, 4475 columns=columns, 4476 include=include, 4477 partition_by=partition_by, 4478 where=where, 4479 with_storage=with_storage, 4480 tablespace=tablespace, 4481 on=on, 4482 ) 4483 ) 4484 4485 def _parse_index( 4486 self, index: exp.Expr | None = None, anonymous: bool = False 4487 ) -> exp.Index | None: 4488 if index or anonymous: 4489 unique = None 4490 primary = None 4491 amp = None 4492 4493 self._match(TokenType.ON) 4494 self._match(TokenType.TABLE) # hive 4495 table = self._parse_table_parts(schema=True) 4496 else: 4497 unique = self._match(TokenType.UNIQUE) 4498 primary = self._match_text_seq("PRIMARY") 4499 amp = self._match_text_seq("AMP") 4500 4501 if not self._match(TokenType.INDEX): 4502 return None 4503 4504 index = self._parse_id_var() 4505 table = None 4506 4507 params = self._parse_index_params() 4508 4509 return self.expression( 4510 exp.Index( 4511 this=index, table=table, unique=unique, primary=primary, amp=amp, params=params 4512 ) 4513 ) 4514 4515 def _parse_table_hints(self) -> list[exp.Expr] | None: 4516 hints: list[exp.Expr] = [] 4517 if self._match_pair(TokenType.WITH, TokenType.L_PAREN): 4518 # https://learn.microsoft.com/en-us/sql/t-sql/queries/hints-transact-sql-table?view=sql-server-ver16 4519 hints.append( 4520 self.expression( 4521 exp.WithTableHint( 4522 expressions=self._parse_csv( 4523 lambda: self._parse_function() or self._parse_var(any_token=True) 4524 ) 4525 ) 4526 ) 4527 ) 4528 self._match_r_paren() 4529 else: 4530 # https://dev.mysql.com/doc/refman/8.0/en/index-hints.html 4531 while self._match_set(self.TABLE_INDEX_HINT_TOKENS): 4532 hint = exp.IndexTableHint(this=self._prev.text.upper()) 4533 4534 self._match_set((TokenType.INDEX, TokenType.KEY)) 4535 if self._match(TokenType.FOR): 4536 hint.set("target", self._advance_any() and self._prev.text.upper()) 4537 4538 hint.set("expressions", self._parse_wrapped_id_vars()) 4539 hints.append(hint) 4540 4541 return hints or None 4542 4543 def _parse_table_part(self, schema: bool = False) -> exp.Expr | None: 4544 return ( 4545 (not schema and self._parse_function(optional_parens=False)) 4546 or self._parse_id_var(any_token=False) 4547 or self._parse_string_as_identifier() 4548 or self._parse_placeholder() 4549 ) 4550 4551 def _parse_table_parts_fast(self) -> exp.Table | None: 4552 index = self._index 4553 parts: list[exp.Identifier] | None = None 4554 all_comments: list[str] | None = None 4555 4556 while self._match_set(self.IDENTIFIER_TOKENS): 4557 token = self._prev 4558 comments = self._prev_comments 4559 4560 has_dot = self._match(TokenType.DOT) 4561 curr_tt = self._curr.token_type 4562 4563 if not has_dot: 4564 if curr_tt in self.TABLE_POSTFIX_TOKENS: 4565 self._retreat(index) 4566 return None 4567 elif curr_tt not in self.IDENTIFIER_TOKENS: 4568 self._retreat(index) 4569 return None 4570 4571 if parts is None: 4572 parts = [] 4573 4574 if comments: 4575 if all_comments is None: 4576 all_comments = [] 4577 all_comments.extend(comments) 4578 self._prev_comments = [] 4579 4580 parts.append( 4581 self.expression( 4582 exp.Identifier( 4583 this=token.text, quoted=token.token_type == TokenType.IDENTIFIER 4584 ), 4585 token, 4586 ) 4587 ) 4588 4589 if not has_dot: 4590 break 4591 4592 if parts is None: 4593 return None 4594 4595 n = len(parts) 4596 4597 if n == 1: 4598 table: exp.Table = exp.Table(this=parts[0]) 4599 elif n == 2: 4600 table = exp.Table(this=parts[1], db=parts[0]) 4601 elif n >= 3: 4602 this: exp.Identifier | exp.Dot = parts[2] 4603 for i in range(3, n): 4604 this = exp.Dot(this=this, expression=parts[i]) 4605 4606 table = exp.Table(this=this, db=parts[1], catalog=parts[0]) 4607 4608 if table is None: 4609 self._retreat(index) 4610 elif all_comments: 4611 table.add_comments(all_comments) 4612 return table 4613 4614 def _parse_table_parts( 4615 self, 4616 schema: bool = False, 4617 is_db_reference: bool = False, 4618 wildcard: bool = False, 4619 fast: bool = False, 4620 ) -> exp.Table | exp.Dot | None: 4621 if fast: 4622 return self._parse_table_parts_fast() 4623 4624 catalog: exp.Expr | str | None = None 4625 db: exp.Expr | str | None = None 4626 table: exp.Expr | str | None = self._parse_table_part(schema=schema) 4627 4628 while self._match(TokenType.DOT): 4629 if catalog: 4630 # This allows nesting the table in arbitrarily many dot expressions if needed 4631 table = self.expression( 4632 exp.Dot(this=table, expression=self._parse_table_part(schema=schema)) 4633 ) 4634 else: 4635 catalog = db 4636 db = table 4637 # "" used for tsql FROM a..b case 4638 table = self._parse_table_part(schema=schema) or "" 4639 4640 if ( 4641 wildcard 4642 and self._is_connected() 4643 and (isinstance(table, exp.Identifier) or not table) 4644 and self._match(TokenType.STAR) 4645 ): 4646 if isinstance(table, exp.Identifier): 4647 table.args["this"] += "*" 4648 else: 4649 table = exp.Identifier(this="*") 4650 4651 if is_db_reference: 4652 catalog = db 4653 db = table 4654 table = None 4655 4656 if not table and not is_db_reference: 4657 self.raise_error(f"Expected table name but got {self._curr}") 4658 if not db and is_db_reference: 4659 self.raise_error(f"Expected database name but got {self._curr}") 4660 4661 table = self.expression(exp.Table(this=table, db=db, catalog=catalog)) 4662 4663 # Bubble up comments from identifier parts to the Table 4664 comments = [] 4665 for part in table.parts: 4666 if part_comments := part.pop_comments(): 4667 comments.extend(part_comments) 4668 if comments: 4669 table.add_comments(comments) 4670 4671 changes = self._parse_changes() 4672 if changes: 4673 table.set("changes", changes) 4674 4675 at_before = self._parse_historical_data() 4676 if at_before: 4677 table.set("when", at_before) 4678 4679 pivots = self._parse_pivots() 4680 if pivots: 4681 table.set("pivots", pivots) 4682 4683 return table 4684 4685 def _parse_table( 4686 self, 4687 schema: bool = False, 4688 joins: bool = False, 4689 alias_tokens: t.Collection[TokenType] | None = None, 4690 parse_bracket: bool = False, 4691 is_db_reference: bool = False, 4692 parse_partition: bool = False, 4693 consume_pipe: bool = False, 4694 ) -> exp.Expr | None: 4695 if not schema and not is_db_reference and not consume_pipe and not joins: 4696 index = self._index 4697 table = self._parse_table_parts(fast=True) 4698 4699 if table is not None: 4700 curr_tt = self._curr.token_type 4701 next_tt = self._next.token_type 4702 4703 fast_terminators = self.TABLE_TERMINATORS 4704 4705 # only return the table if we're sure there are no other operators 4706 # MATCH_CONDITION is a special case because it accepts any alias before it like LIMIT 4707 if curr_tt in fast_terminators and next_tt != TokenType.MATCH_CONDITION: 4708 return table 4709 4710 postfix_tokens = self.TABLE_POSTFIX_TOKENS 4711 4712 if curr_tt not in postfix_tokens and next_tt not in postfix_tokens: 4713 if alias := self._parse_table_alias( 4714 alias_tokens=alias_tokens or self.TABLE_ALIAS_TOKENS 4715 ): 4716 table.set("alias", alias) 4717 4718 if self._curr.token_type in fast_terminators: 4719 return table 4720 4721 self._retreat(index) 4722 4723 if stream := self._parse_stream(): 4724 return stream 4725 4726 if lateral := self._parse_lateral(): 4727 return lateral 4728 4729 if unnest := self._parse_unnest(): 4730 return unnest 4731 4732 if values := self._parse_derived_table_values(): 4733 return values 4734 4735 if subquery := self._parse_select(table=True, consume_pipe=consume_pipe): 4736 if not subquery.args.get("pivots"): 4737 subquery.set("pivots", self._parse_pivots()) 4738 return subquery 4739 4740 bracket = parse_bracket and self._parse_bracket(None) 4741 bracket = self.expression(exp.Table(this=bracket)) if bracket else None 4742 4743 rows_from_tables = ( 4744 self._parse_wrapped_csv(self._parse_table) 4745 if self._match_text_seq("ROWS", "FROM") 4746 else None 4747 ) 4748 rows_from = ( 4749 self.expression(exp.Table(rows_from=rows_from_tables)) if rows_from_tables else None 4750 ) 4751 4752 only = self._match(TokenType.ONLY) 4753 4754 this = t.cast( 4755 exp.Expr, 4756 bracket 4757 or rows_from 4758 or self._parse_bracket( 4759 self._parse_table_parts(schema=schema, is_db_reference=is_db_reference) 4760 ), 4761 ) 4762 4763 if only: 4764 this.set("only", only) 4765 4766 # Postgres supports a wildcard (table) suffix operator, which is a no-op in this context 4767 self._match(TokenType.STAR) 4768 4769 parse_partition = parse_partition or self.SUPPORTS_PARTITION_SELECTION 4770 if parse_partition and self._match(TokenType.PARTITION, advance=False): 4771 this.set("partition", self._parse_partition()) 4772 4773 if schema: 4774 return self._parse_schema(this=this) 4775 4776 if self.dialect.ALIAS_POST_VERSION: 4777 this.set("version", self._parse_version()) 4778 4779 if self.dialect.ALIAS_POST_TABLESAMPLE: 4780 this.set("sample", self._parse_table_sample()) 4781 4782 alias = self._parse_table_alias(alias_tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 4783 if alias: 4784 this.set("alias", alias) 4785 4786 if self._match(TokenType.INDEXED_BY): 4787 this.set("indexed", self._parse_table_parts()) 4788 elif self._match_text_seq("NOT", "INDEXED"): 4789 this.set("indexed", False) 4790 4791 if isinstance(this, exp.Table) and self._match_text_seq("AT"): 4792 return self.expression( 4793 exp.AtIndex(this=this.to_column(copy=False), expression=self._parse_id_var()) 4794 ) 4795 4796 this.set("hints", self._parse_table_hints()) 4797 4798 if not this.args.get("pivots"): 4799 this.set("pivots", self._parse_pivots()) 4800 4801 if not self.dialect.ALIAS_POST_TABLESAMPLE: 4802 this.set("sample", self._parse_table_sample()) 4803 4804 if not self.dialect.ALIAS_POST_VERSION: 4805 this.set("version", self._parse_version()) 4806 4807 if joins: 4808 for join in self._parse_joins(): 4809 this.append("joins", join) 4810 4811 if self._match_pair(TokenType.WITH, TokenType.ORDINALITY): 4812 this.set("ordinality", True) 4813 this.set("alias", self._parse_table_alias()) 4814 4815 return this 4816 4817 def _parse_version(self) -> exp.Version | None: 4818 if self._match(TokenType.TIMESTAMP_SNAPSHOT): 4819 this = "TIMESTAMP" 4820 elif self._match(TokenType.VERSION_SNAPSHOT): 4821 this = "VERSION" 4822 else: 4823 return None 4824 4825 if self._match_set((TokenType.FROM, TokenType.BETWEEN)): 4826 kind = self._prev.text.upper() 4827 start = self._parse_bitwise() 4828 self._match_texts(("TO", "AND")) 4829 end = self._parse_bitwise() 4830 expression: exp.Expr | None = self.expression(exp.Tuple(expressions=[start, end])) 4831 elif self._match_text_seq("CONTAINED", "IN"): 4832 kind = "CONTAINED IN" 4833 expression = self.expression( 4834 exp.Tuple(expressions=self._parse_wrapped_csv(self._parse_bitwise)) 4835 ) 4836 elif self._match(TokenType.ALL): 4837 kind = "ALL" 4838 expression = None 4839 else: 4840 self._match_text_seq("AS", "OF") 4841 kind = "AS OF" 4842 expression = self._parse_type() 4843 4844 return self.expression(exp.Version(this=this, expression=expression, kind=kind)) 4845 4846 def _parse_historical_data(self) -> exp.HistoricalData | None: 4847 # https://docs.snowflake.com/en/sql-reference/constructs/at-before 4848 index = self._index 4849 historical_data = None 4850 if self._match_texts(self.HISTORICAL_DATA_PREFIX): 4851 this = self._prev.text.upper() 4852 kind = ( 4853 self._match(TokenType.L_PAREN) 4854 and self._match_texts(self.HISTORICAL_DATA_KIND) 4855 and self._prev.text.upper() 4856 ) 4857 expression = self._match(TokenType.FARROW) and self._parse_bitwise() 4858 4859 if expression: 4860 self._match_r_paren() 4861 historical_data = self.expression( 4862 exp.HistoricalData(this=this, kind=kind, expression=expression) 4863 ) 4864 else: 4865 self._retreat(index) 4866 4867 return historical_data 4868 4869 def _parse_changes(self) -> exp.Changes | None: 4870 if not self._match_text_seq("CHANGES", "(", "INFORMATION", "=>"): 4871 return None 4872 4873 information = self._parse_var(any_token=True) 4874 self._match_r_paren() 4875 4876 return self.expression( 4877 exp.Changes( 4878 information=information, 4879 at_before=self._parse_historical_data(), 4880 end=self._parse_historical_data(), 4881 ) 4882 ) 4883 4884 def _parse_unnest(self, with_alias: bool = True) -> exp.Unnest | None: 4885 if not self._match_pair(TokenType.UNNEST, TokenType.L_PAREN, advance=False): 4886 return None 4887 4888 self._advance() 4889 4890 expressions = self._parse_wrapped_csv(self._parse_equality) 4891 offset: bool | exp.Expr = self._match_pair(TokenType.WITH, TokenType.ORDINALITY) 4892 4893 alias = self._parse_table_alias() if with_alias else None 4894 4895 if alias: 4896 if self.dialect.UNNEST_COLUMN_ONLY: 4897 if alias.args.get("columns"): 4898 self.raise_error("Unexpected extra column alias in unnest.") 4899 4900 alias.set("columns", [alias.this]) 4901 alias.set("this", None) 4902 4903 columns = alias.args.get("columns") or [] 4904 if offset and len(expressions) < len(columns): 4905 offset = columns.pop() 4906 4907 if not offset and self._match_pair(TokenType.WITH, TokenType.OFFSET): 4908 self._match(TokenType.ALIAS) 4909 offset = self._parse_id_var( 4910 any_token=False, tokens=self.UNNEST_OFFSET_ALIAS_TOKENS 4911 ) or exp.to_identifier("offset") 4912 4913 return self.expression(exp.Unnest(expressions=expressions, alias=alias, offset=offset)) 4914 4915 def _parse_derived_table_values(self) -> exp.Values | None: 4916 is_derived = self._match_pair(TokenType.L_PAREN, TokenType.VALUES) 4917 if not is_derived and not ( 4918 # ClickHouse's `FORMAT Values` is equivalent to `VALUES` 4919 self._match_text_seq("VALUES") or self._match_text_seq("FORMAT", "VALUES") 4920 ): 4921 return None 4922 4923 expressions = self._parse_csv(self._parse_value) 4924 alias = self._parse_table_alias() 4925 4926 if is_derived: 4927 self._match_r_paren() 4928 4929 return self.expression( 4930 exp.Values(expressions=expressions, alias=alias or self._parse_table_alias()) 4931 ) 4932 4933 def _parse_table_sample(self, as_modifier: bool = False) -> exp.TableSample | None: 4934 if not self._match(TokenType.TABLE_SAMPLE) and not ( 4935 as_modifier and self._match_text_seq("USING", "SAMPLE") 4936 ): 4937 return None 4938 4939 bucket_numerator = None 4940 bucket_denominator = None 4941 bucket_field = None 4942 percent = None 4943 size = None 4944 seed = None 4945 4946 method = self._parse_var(tokens=(TokenType.ROW,), upper=True) 4947 matched_l_paren = self._match(TokenType.L_PAREN) 4948 4949 if self.TABLESAMPLE_CSV: 4950 num = None 4951 expressions = self._parse_csv(self._parse_primary) 4952 else: 4953 expressions = None 4954 num = ( 4955 self._parse_factor() 4956 if self._match(TokenType.NUMBER, advance=False) 4957 else self._parse_primary() or self._parse_placeholder() 4958 ) 4959 4960 if self._match_text_seq("BUCKET"): 4961 bucket_numerator = self._parse_number() 4962 self._match_text_seq("OUT", "OF") 4963 bucket_denominator = bucket_denominator = self._parse_number() 4964 self._match(TokenType.ON) 4965 bucket_field = self._parse_field() 4966 elif self._match_set((TokenType.PERCENT, TokenType.MOD)): 4967 percent = num 4968 elif self._match(TokenType.ROWS) or not self.dialect.TABLESAMPLE_SIZE_IS_PERCENT: 4969 size = num 4970 else: 4971 percent = num 4972 4973 if matched_l_paren: 4974 self._match_r_paren() 4975 4976 if self._match(TokenType.L_PAREN): 4977 method = self._parse_var(upper=True) 4978 seed = self._match(TokenType.COMMA) and self._parse_number() 4979 self._match_r_paren() 4980 elif self._match_texts(("SEED", "REPEATABLE")): 4981 seed = self._parse_wrapped(self._parse_number) 4982 4983 if not method and self.DEFAULT_SAMPLING_METHOD: 4984 method = exp.var(self.DEFAULT_SAMPLING_METHOD) 4985 4986 return self.expression( 4987 exp.TableSample( 4988 expressions=expressions, 4989 method=method, 4990 bucket_numerator=bucket_numerator, 4991 bucket_denominator=bucket_denominator, 4992 bucket_field=bucket_field, 4993 percent=percent, 4994 size=size, 4995 seed=seed, 4996 ) 4997 ) 4998 4999 def _parse_pivots(self) -> list[exp.Pivot] | None: 5000 return list(iter(self._parse_pivot, None)) or None 5001 5002 def _parse_joins(self) -> t.Iterator[exp.Join]: 5003 return iter(self._parse_join, None) 5004 5005 def _parse_unpivot_columns(self) -> exp.UnpivotColumns | None: 5006 if not self._match(TokenType.INTO): 5007 return None 5008 5009 return self.expression( 5010 exp.UnpivotColumns( 5011 this=self._match_text_seq("NAME") and self._parse_column(), 5012 expressions=self._match_text_seq("VALUE") and self._parse_csv(self._parse_column), 5013 ) 5014 ) 5015 5016 # https://duckdb.org/docs/sql/statements/pivot 5017 def _parse_simplified_pivot(self, is_unpivot: bool | None = None) -> exp.Pivot: 5018 def _parse_on() -> exp.Expr | None: 5019 this = self._parse_bitwise() 5020 5021 if self._match(TokenType.IN): 5022 # PIVOT ... ON col IN (row_val1, row_val2) 5023 return self._parse_in(this) 5024 if self._match(TokenType.ALIAS, advance=False): 5025 # UNPIVOT ... ON (col1, col2, col3) AS row_val 5026 return self._parse_alias(this) 5027 5028 return this 5029 5030 this = self._parse_table() 5031 expressions = self._match(TokenType.ON) and self._parse_csv(_parse_on) 5032 into = self._parse_unpivot_columns() 5033 using = self._match(TokenType.USING) and self._parse_csv( 5034 lambda: self._parse_alias(self._parse_column()) 5035 ) 5036 group = self._parse_group() 5037 5038 return self.expression( 5039 exp.Pivot( 5040 this=this, 5041 expressions=expressions, 5042 using=using, 5043 group=group, 5044 unpivot=is_unpivot, 5045 into=into, 5046 ) 5047 ) 5048 5049 def _parse_pivot_in(self) -> exp.In: 5050 def _parse_aliased_expression() -> exp.Expr | None: 5051 this = self._parse_select_or_expression() 5052 5053 self._match(TokenType.ALIAS) 5054 alias = self._parse_bitwise() 5055 if alias: 5056 if isinstance(alias, exp.Column) and not alias.db: 5057 alias = alias.this 5058 return self.expression(exp.PivotAlias(this=this, alias=alias)) 5059 5060 return this 5061 5062 value = self._parse_column() 5063 5064 if not self._match(TokenType.IN): 5065 self.raise_error("Expecting IN") 5066 5067 if self._match(TokenType.L_PAREN): 5068 if self._match(TokenType.ANY): 5069 exprs: list[exp.Expr] = ensure_list(exp.PivotAny(this=self._parse_order())) 5070 else: 5071 exprs = self._parse_csv(_parse_aliased_expression) 5072 self._match_r_paren() 5073 return self.expression(exp.In(this=value, expressions=exprs)) 5074 5075 return self.expression(exp.In(this=value, field=self._parse_id_var())) 5076 5077 def _parse_pivot_aggregation(self) -> exp.Expr | None: 5078 func = self._parse_function() 5079 if not func: 5080 if self._prev.token_type == TokenType.COMMA: 5081 return None 5082 self.raise_error("Expecting an aggregation function in PIVOT") 5083 5084 return self._parse_alias(func) 5085 5086 def _parse_pivot(self) -> exp.Pivot | None: 5087 index = self._index 5088 include_nulls = None 5089 5090 if self._match(TokenType.PIVOT): 5091 unpivot = False 5092 elif self._match(TokenType.UNPIVOT): 5093 unpivot = True 5094 5095 # https://docs.databricks.com/en/sql/language-manual/sql-ref-syntax-qry-select-unpivot.html#syntax 5096 if self._match_text_seq("INCLUDE", "NULLS"): 5097 include_nulls = True 5098 elif self._match_text_seq("EXCLUDE", "NULLS"): 5099 include_nulls = False 5100 else: 5101 return None 5102 5103 expressions = [] 5104 5105 if not self._match(TokenType.L_PAREN): 5106 self._retreat(index) 5107 return None 5108 5109 if unpivot: 5110 expressions = self._parse_csv(self._parse_column) 5111 else: 5112 expressions = self._parse_csv(self._parse_pivot_aggregation) 5113 5114 if not expressions: 5115 self.raise_error("Failed to parse PIVOT's aggregation list") 5116 5117 if not self._match(TokenType.FOR): 5118 self.raise_error("Expecting FOR") 5119 5120 fields = [] 5121 while True: 5122 field = self._try_parse(self._parse_pivot_in) 5123 if not field: 5124 break 5125 fields.append(field) 5126 5127 default_on_null = self._match_text_seq("DEFAULT", "ON", "NULL") and self._parse_wrapped( 5128 self._parse_bitwise 5129 ) 5130 5131 group = self._parse_group() 5132 5133 self._match_r_paren() 5134 5135 pivot = self.expression( 5136 exp.Pivot( 5137 expressions=expressions, 5138 fields=fields, 5139 unpivot=unpivot, 5140 include_nulls=include_nulls, 5141 default_on_null=default_on_null, 5142 group=group, 5143 ) 5144 ) 5145 5146 if not self._match_set((TokenType.PIVOT, TokenType.UNPIVOT), advance=False): 5147 pivot.set("alias", self._parse_table_alias()) 5148 5149 if not unpivot: 5150 names = self._pivot_column_names(t.cast(list[exp.Expr], expressions)) 5151 5152 columns: list[exp.Expr] = [] 5153 all_fields = [] 5154 for pivot_field in pivot.fields: 5155 pivot_field_expressions = pivot_field.expressions 5156 5157 # The `PivotAny` expression corresponds to `ANY ORDER BY <column>`; we can't infer in this case. 5158 if isinstance(seq_get(pivot_field_expressions, 0), exp.PivotAny): 5159 continue 5160 5161 all_fields.append( 5162 [ 5163 fld.sql() if self.IDENTIFY_PIVOT_STRINGS else fld.alias_or_name 5164 for fld in pivot_field_expressions 5165 ] 5166 ) 5167 5168 if all_fields: 5169 if names: 5170 all_fields.append(names) 5171 5172 # Generate all possible combinations of the pivot columns 5173 # e.g PIVOT(sum(...) as total FOR year IN (2000, 2010) FOR country IN ('NL', 'US')) 5174 # generates the product between [[2000, 2010], ['NL', 'US'], ['total']] 5175 for fld_parts_tuple in itertools.product(*all_fields): 5176 fld_parts = list(fld_parts_tuple) 5177 5178 if names and self.PREFIXED_PIVOT_COLUMNS: 5179 # Move the "name" to the front of the list 5180 fld_parts.insert(0, fld_parts.pop(-1)) 5181 5182 columns.append(exp.to_identifier("_".join(fld_parts))) 5183 5184 pivot.set("columns", columns) 5185 5186 return pivot 5187 5188 def _pivot_column_names(self, aggregations: list[exp.Expr]) -> list[str]: 5189 return [agg.alias for agg in aggregations if agg.alias] 5190 5191 def _parse_prewhere(self, skip_where_token: bool = False) -> exp.PreWhere | None: 5192 if not skip_where_token and not self._match(TokenType.PREWHERE): 5193 return None 5194 5195 comments = self._prev_comments 5196 return self.expression( 5197 exp.PreWhere(this=self._parse_disjunction()), 5198 comments=comments, 5199 ) 5200 5201 def _parse_where(self, skip_where_token: bool = False) -> exp.Where | None: 5202 if not skip_where_token and not self._match(TokenType.WHERE): 5203 return None 5204 5205 comments = self._prev_comments 5206 return self.expression( 5207 exp.Where(this=self._parse_disjunction()), 5208 comments=comments, 5209 ) 5210 5211 def _parse_group(self, skip_group_by_token: bool = False) -> exp.Group | None: 5212 if not skip_group_by_token and not self._match(TokenType.GROUP_BY): 5213 return None 5214 comments = self._prev_comments 5215 5216 elements: dict[str, t.Any] = defaultdict(list) 5217 5218 if self._match(TokenType.ALL): 5219 elements["all"] = True 5220 elif self._match(TokenType.DISTINCT): 5221 elements["all"] = False 5222 5223 if self._match_set(self.QUERY_MODIFIER_TOKENS, advance=False): 5224 return self.expression(exp.Group(**elements), comments=comments) # type: ignore 5225 5226 while True: 5227 index = self._index 5228 5229 elements["expressions"].extend( 5230 self._parse_csv( 5231 lambda: ( 5232 None 5233 if self._match_set((TokenType.CUBE, TokenType.ROLLUP), advance=False) 5234 else self._parse_disjunction() 5235 ) 5236 ) 5237 ) 5238 5239 before_with_index = self._index 5240 with_prefix = self._match(TokenType.WITH) 5241 5242 if cube_or_rollup := self._parse_cube_or_rollup(with_prefix=with_prefix): 5243 key = "rollup" if isinstance(cube_or_rollup, exp.Rollup) else "cube" 5244 elements[key].append(cube_or_rollup) 5245 elif grouping_sets := self._parse_grouping_sets(): 5246 elements["grouping_sets"].append(grouping_sets) 5247 elif self._match_text_seq("TOTALS"): 5248 elements["totals"] = True # type: ignore 5249 5250 if before_with_index <= self._index <= before_with_index + 1: 5251 self._retreat(before_with_index) 5252 break 5253 5254 if index == self._index: 5255 break 5256 5257 return self.expression(exp.Group(**elements), comments=comments) # type: ignore 5258 5259 def _parse_cube_or_rollup(self, with_prefix: bool = False) -> exp.Cube | exp.Rollup | None: 5260 if self._match(TokenType.CUBE): 5261 kind: type[exp.Cube | exp.Rollup] = exp.Cube 5262 elif self._match(TokenType.ROLLUP): 5263 kind = exp.Rollup 5264 else: 5265 return None 5266 5267 return self.expression( 5268 kind(expressions=[] if with_prefix else self._parse_wrapped_csv(self._parse_bitwise)) 5269 ) 5270 5271 def _parse_grouping_sets(self) -> exp.GroupingSets | None: 5272 if self._match(TokenType.GROUPING_SETS): 5273 return self.expression( 5274 exp.GroupingSets(expressions=self._parse_wrapped_csv(self._parse_grouping_set)) 5275 ) 5276 return None 5277 5278 def _parse_grouping_set(self) -> exp.Expr | None: 5279 return self._parse_grouping_sets() or self._parse_cube_or_rollup() or self._parse_bitwise() 5280 5281 def _parse_having(self, skip_having_token: bool = False) -> exp.Having | None: 5282 if not skip_having_token and not self._match(TokenType.HAVING): 5283 return None 5284 comments = self._prev_comments 5285 return self.expression( 5286 exp.Having(this=self._parse_disjunction()), 5287 comments=comments, 5288 ) 5289 5290 def _parse_qualify(self) -> exp.Qualify | None: 5291 if not self._match(TokenType.QUALIFY): 5292 return None 5293 return self.expression(exp.Qualify(this=self._parse_disjunction())) 5294 5295 def _parse_connect_with_prior(self) -> exp.Expr | None: 5296 self.NO_PAREN_FUNCTION_PARSERS["PRIOR"] = lambda self: self.expression( 5297 exp.Prior(this=self._parse_bitwise()) 5298 ) 5299 connect = self._parse_disjunction() 5300 self.NO_PAREN_FUNCTION_PARSERS.pop("PRIOR") 5301 return connect 5302 5303 def _parse_connect(self, skip_start_token: bool = False) -> exp.Connect | None: 5304 if skip_start_token: 5305 start = None 5306 elif self._match(TokenType.START_WITH): 5307 start = self._parse_disjunction() 5308 else: 5309 return None 5310 5311 self._match(TokenType.CONNECT_BY) 5312 nocycle = self._match_text_seq("NOCYCLE") 5313 connect = self._parse_connect_with_prior() 5314 5315 if not start and self._match(TokenType.START_WITH): 5316 start = self._parse_disjunction() 5317 5318 return self.expression(exp.Connect(start=start, connect=connect, nocycle=nocycle)) 5319 5320 def _parse_name_as_expression(self) -> exp.Expr | None: 5321 this = self._parse_id_var(any_token=True) 5322 if self._match(TokenType.ALIAS): 5323 this = self.expression(exp.Alias(alias=this, this=self._parse_disjunction())) 5324 return this 5325 5326 def _parse_interpolate(self) -> list[exp.Expr] | None: 5327 if self._match_text_seq("INTERPOLATE"): 5328 return self._parse_wrapped_csv(self._parse_name_as_expression) 5329 return None 5330 5331 def _parse_order( 5332 self, this: exp.Expr | None = None, skip_order_token: bool = False 5333 ) -> exp.Expr | None: 5334 siblings = None 5335 if not skip_order_token and not self._match(TokenType.ORDER_BY): 5336 if not self._match(TokenType.ORDER_SIBLINGS_BY): 5337 return this 5338 5339 siblings = True 5340 5341 comments = self._prev_comments 5342 return self.expression( 5343 exp.Order( 5344 this=this, 5345 expressions=self._parse_csv(self._parse_ordered), 5346 siblings=siblings, 5347 ), 5348 comments=comments, 5349 ) 5350 5351 def _parse_sort(self, exp_class: type[E], token: TokenType) -> E | None: 5352 if not self._match(token): 5353 return None 5354 return self.expression(exp_class(expressions=self._parse_csv(self._parse_ordered))) 5355 5356 def _parse_ordered( 5357 self, parse_method: t.Callable[[], exp.Expr | None] | None = None 5358 ) -> exp.Ordered | None: 5359 this = parse_method() if parse_method else self._parse_disjunction() 5360 if not this: 5361 return None 5362 5363 if this.name.upper() == "ALL" and self.dialect.SUPPORTS_ORDER_BY_ALL: 5364 this = exp.var("ALL") 5365 5366 asc = self._match(TokenType.ASC) 5367 desc: bool | None = True if self._match(TokenType.DESC) else (False if asc else None) 5368 5369 is_nulls_first = self._match_text_seq("NULLS", "FIRST") 5370 is_nulls_last = self._match_text_seq("NULLS", "LAST") 5371 5372 nulls_first = is_nulls_first or False 5373 explicitly_null_ordered = is_nulls_first or is_nulls_last 5374 5375 if ( 5376 not explicitly_null_ordered 5377 and ( 5378 (not desc and self.dialect.NULL_ORDERING == "nulls_are_small") 5379 or (desc and self.dialect.NULL_ORDERING != "nulls_are_small") 5380 ) 5381 and self.dialect.NULL_ORDERING != "nulls_are_last" 5382 ): 5383 nulls_first = True 5384 5385 if self._match_text_seq("WITH", "FILL"): 5386 with_fill = self.expression( 5387 exp.WithFill( 5388 from_=self._match(TokenType.FROM) and self._parse_bitwise(), 5389 to=self._match_text_seq("TO") and self._parse_bitwise(), 5390 step=self._match_text_seq("STEP") and self._parse_bitwise(), 5391 interpolate=self._parse_interpolate(), 5392 ) 5393 ) 5394 else: 5395 with_fill = None 5396 5397 return self.expression( 5398 exp.Ordered(this=this, desc=desc, nulls_first=nulls_first, with_fill=with_fill) 5399 ) 5400 5401 def _parse_limit_options(self) -> exp.LimitOptions | None: 5402 percent = self._match_set((TokenType.PERCENT, TokenType.MOD)) 5403 rows = self._match_set((TokenType.ROW, TokenType.ROWS)) 5404 self._match_text_seq("ONLY") 5405 with_ties = self._match_text_seq("WITH", "TIES") 5406 5407 if not (percent or rows or with_ties): 5408 return None 5409 5410 return self.expression(exp.LimitOptions(percent=percent, rows=rows, with_ties=with_ties)) 5411 5412 def _parse_limit( 5413 self, 5414 this: exp.Expr | None = None, 5415 top: bool = False, 5416 skip_limit_token: bool = False, 5417 ) -> exp.Expr | None: 5418 if skip_limit_token or self._match(TokenType.TOP if top else TokenType.LIMIT): 5419 comments = self._prev_comments 5420 if top: 5421 limit_paren = self._match(TokenType.L_PAREN) 5422 expression = ( 5423 self._parse_term() or self._parse_select() 5424 if limit_paren 5425 else self._parse_number() 5426 ) 5427 5428 if limit_paren: 5429 self._match_r_paren() 5430 5431 else: 5432 # Parsing LIMIT x% (i.e x PERCENT) as a term leads to an error, since 5433 # we try to build an exp.Mod expr. For that matter, we backtrack and instead 5434 # consume the factor plus parse the percentage separately 5435 index = self._index 5436 expression = self._try_parse(self._parse_term) 5437 if isinstance(expression, exp.Mod): 5438 self._retreat(index) 5439 expression = self._parse_factor() 5440 elif not expression: 5441 expression = self._parse_factor() 5442 limit_options = self._parse_limit_options() 5443 5444 if self._match(TokenType.COMMA): 5445 offset = expression 5446 expression = self._parse_term() 5447 else: 5448 offset = None 5449 5450 limit_exp = self.expression( 5451 exp.Limit( 5452 this=this, 5453 expression=expression, 5454 offset=offset, 5455 limit_options=limit_options, 5456 expressions=self._parse_limit_by(), 5457 ), 5458 comments=comments, 5459 ) 5460 5461 return limit_exp 5462 5463 if self._match(TokenType.FETCH): 5464 direction = ( 5465 self._prev.text.upper() 5466 if self._match_set((TokenType.FIRST, TokenType.NEXT)) 5467 else "FIRST" 5468 ) 5469 5470 count = self._parse_field(tokens=self.FETCH_TOKENS) 5471 5472 return self.expression( 5473 exp.Fetch( 5474 direction=direction, count=count, limit_options=self._parse_limit_options() 5475 ) 5476 ) 5477 5478 return this 5479 5480 def _parse_offset(self, this: exp.Expr | None = None) -> exp.Expr | None: 5481 if not self._match(TokenType.OFFSET): 5482 return this 5483 5484 count = self._parse_term() 5485 self._match_set((TokenType.ROW, TokenType.ROWS)) 5486 5487 return self.expression( 5488 exp.Offset(this=this, expression=count, expressions=self._parse_limit_by()) 5489 ) 5490 5491 def _can_parse_limit_or_offset(self) -> bool: 5492 if not self._match_set(self.AMBIGUOUS_ALIAS_TOKENS, advance=False): 5493 return False 5494 5495 index = self._index 5496 result = bool( 5497 self._try_parse(self._parse_limit, retreat=True) 5498 or self._try_parse(self._parse_offset, retreat=True) 5499 ) 5500 self._retreat(index) 5501 5502 # MATCH_CONDITION (...) is a special construct that should not be consumed by limit/offset 5503 if self._next.token_type == TokenType.MATCH_CONDITION: 5504 result = False 5505 5506 return result 5507 5508 def _parse_limit_by(self) -> list[exp.Expr] | None: 5509 return self._parse_csv(self._parse_bitwise) if self._match_text_seq("BY") else None 5510 5511 def _parse_locks(self) -> list[exp.Lock]: 5512 locks = [] 5513 while True: 5514 update, key = None, None 5515 if self._match_text_seq("FOR", "UPDATE"): 5516 update = True 5517 elif self._match_text_seq("FOR", "SHARE") or self._match_text_seq( 5518 "LOCK", "IN", "SHARE", "MODE" 5519 ): 5520 update = False 5521 elif self._match_text_seq("FOR", "KEY", "SHARE"): 5522 update, key = False, True 5523 elif self._match_text_seq("FOR", "NO", "KEY", "UPDATE"): 5524 update, key = True, True 5525 else: 5526 break 5527 5528 expressions = None 5529 if self._match_text_seq("OF"): 5530 expressions = self._parse_csv(lambda: self._parse_table(schema=True)) 5531 5532 wait: bool | exp.Expr | None = None 5533 if self._match_text_seq("NOWAIT"): 5534 wait = True 5535 elif self._match_text_seq("WAIT"): 5536 wait = self._parse_primary() 5537 elif self._match_text_seq("SKIP", "LOCKED"): 5538 wait = False 5539 5540 locks.append( 5541 self.expression( 5542 exp.Lock(update=update, expressions=expressions, wait=wait, key=key) 5543 ) 5544 ) 5545 5546 return locks 5547 5548 def parse_set_operation( 5549 self, this: exp.Expr | None, consume_pipe: bool = False 5550 ) -> exp.Expr | None: 5551 start = self._index 5552 _, side_token, kind_token = self._parse_join_parts() 5553 5554 side = side_token.text if side_token else None 5555 kind = kind_token.text if kind_token else None 5556 5557 if not self._match_set(self.SET_OPERATIONS): 5558 self._retreat(start) 5559 return None 5560 5561 token_type = self._prev.token_type 5562 5563 if token_type == TokenType.UNION: 5564 operation: type[exp.SetOperation] = exp.Union 5565 elif token_type == TokenType.EXCEPT: 5566 operation = exp.Except 5567 else: 5568 operation = exp.Intersect 5569 5570 comments = self._prev.comments 5571 5572 if self._match(TokenType.DISTINCT): 5573 distinct: bool | None = True 5574 elif self._match(TokenType.ALL): 5575 distinct = False 5576 else: 5577 distinct = self.dialect.SET_OP_DISTINCT_BY_DEFAULT[operation] 5578 if distinct is None: 5579 self.raise_error(f"Expected DISTINCT or ALL for {operation.__name__}") 5580 5581 by_name = ( 5582 self._match_text_seq("BY", "NAME") 5583 or self._match_text_seq("STRICT", "CORRESPONDING") 5584 or None 5585 ) 5586 if self._match_text_seq("CORRESPONDING"): 5587 by_name = True 5588 if not side and not kind: 5589 kind = "INNER" 5590 5591 on_column_list = None 5592 if by_name and self._match_texts(("ON", "BY")): 5593 on_column_list = self._parse_wrapped_csv(self._parse_column) 5594 5595 expression = self._parse_select( 5596 nested=True, parse_set_operation=False, consume_pipe=consume_pipe 5597 ) 5598 5599 return self.expression( 5600 operation( 5601 this=this, 5602 distinct=distinct, 5603 by_name=by_name, 5604 expression=expression, 5605 side=side, 5606 kind=kind, 5607 on=on_column_list, 5608 ), 5609 comments=comments, 5610 ) 5611 5612 def _parse_set_operations(self, this: exp.Expr | None) -> exp.Expr | None: 5613 while this: 5614 setop = self.parse_set_operation(this) 5615 if not setop: 5616 break 5617 this = setop 5618 5619 if isinstance(this, exp.SetOperation) and self.MODIFIERS_ATTACHED_TO_SET_OP: 5620 expression = this.expression 5621 5622 if expression: 5623 for arg in self.SET_OP_MODIFIERS: 5624 expr = expression.args.get(arg) 5625 if expr: 5626 this.set(arg, expr.pop()) 5627 5628 return this 5629 5630 def _parse_expression(self) -> exp.Expr | None: 5631 return self._parse_alias(self._parse_assignment()) 5632 5633 def _parse_assignment(self) -> exp.Expr | None: 5634 this = self._parse_disjunction() 5635 if not this and self._next.token_type in self.ASSIGNMENT: 5636 # This allows us to parse <non-identifier token> := <expr> 5637 this = exp.column( 5638 t.cast(str, self._advance_any(ignore_reserved=True) and self._prev.text) 5639 ) 5640 5641 while self._match_set(self.ASSIGNMENT): 5642 if isinstance(this, exp.Column) and len(this.parts) == 1: 5643 this = this.this 5644 5645 comments = self._prev_comments 5646 this = self.expression( 5647 self.ASSIGNMENT[self._prev.token_type]( 5648 this=this, expression=self._parse_assignment() 5649 ), 5650 comments=comments, 5651 ) 5652 5653 return this 5654 5655 def _parse_disjunction(self) -> exp.Expr | None: 5656 this = self._parse_conjunction() 5657 while self._match_set(self.DISJUNCTION): 5658 comments = self._prev_comments 5659 this = self.expression( 5660 self.DISJUNCTION[self._prev.token_type]( 5661 this=this, expression=self._parse_conjunction() 5662 ), 5663 comments=comments, 5664 ) 5665 return this 5666 5667 def _parse_conjunction(self) -> exp.Expr | None: 5668 this = self._parse_equality() 5669 while self._match_set(self.CONJUNCTION): 5670 comments = self._prev_comments 5671 this = self.expression( 5672 self.CONJUNCTION[self._prev.token_type]( 5673 this=this, expression=self._parse_equality() 5674 ), 5675 comments=comments, 5676 ) 5677 return this 5678 5679 def _parse_equality(self) -> exp.Expr | None: 5680 this = self._parse_comparison() 5681 while self._match_set(self.EQUALITY): 5682 comments = self._prev_comments 5683 this = self.expression( 5684 self.EQUALITY[self._prev.token_type]( 5685 this=this, expression=self._parse_comparison() 5686 ), 5687 comments=comments, 5688 ) 5689 return this 5690 5691 def _parse_comparison(self) -> exp.Expr | None: 5692 this = self._parse_range() 5693 while self._match_set(self.COMPARISON): 5694 comments = self._prev_comments 5695 this = self.expression( 5696 self.COMPARISON[self._prev.token_type](this=this, expression=self._parse_range()), 5697 comments=comments, 5698 ) 5699 return this 5700 5701 def _parse_range(self, this: exp.Expr | None = None) -> exp.Expr | None: 5702 this = this or self._parse_bitwise() 5703 negate = self._match(TokenType.NOT) 5704 5705 if self._match_set(self.RANGE_PARSERS): 5706 expression = self.RANGE_PARSERS[self._prev.token_type](self, this) 5707 if not expression: 5708 return this 5709 5710 this = expression 5711 elif self._match(TokenType.ISNULL) or (negate and self._match(TokenType.NULL)): 5712 this = self.expression(exp.Is(this=this, expression=exp.Null())) 5713 5714 # Postgres supports ISNULL and NOTNULL for conditions. 5715 # https://blog.andreiavram.ro/postgresql-null-composite-type/ 5716 if self._match(TokenType.NOTNULL): 5717 this = self.expression(exp.Is(this=this, expression=exp.Null())) 5718 this = self.expression(exp.Not(this=this)) 5719 5720 if negate: 5721 this = self._negate_range(this) 5722 5723 if self._match(TokenType.IS): 5724 this = self._parse_is(this) 5725 5726 return this 5727 5728 def _negate_range(self, this: exp.Expr | None = None) -> exp.Expr | None: 5729 if not this: 5730 return this 5731 5732 return self.expression(exp.Not(this=this)) 5733 5734 def _parse_is(self, this: exp.Expr | None) -> exp.Expr | None: 5735 index = self._index - 1 5736 negate = self._match(TokenType.NOT) 5737 5738 if self._match_text_seq("DISTINCT", "FROM"): 5739 klass = exp.NullSafeEQ if negate else exp.NullSafeNEQ 5740 return self.expression(klass(this=this, expression=self._parse_bitwise())) 5741 5742 if self._match(TokenType.JSON): 5743 kind = self._match_texts(self.IS_JSON_PREDICATE_KIND) and self._prev.text.upper() 5744 5745 if self._match_text_seq("WITH"): 5746 _with = True 5747 elif self._match_text_seq("WITHOUT"): 5748 _with = False 5749 else: 5750 _with = None 5751 5752 unique = self._match(TokenType.UNIQUE) 5753 self._match_text_seq("KEYS") 5754 expression: exp.Expr | None = self.expression( 5755 exp.JSON(this=kind, with_=_with, unique=unique) 5756 ) 5757 else: 5758 expression = self._parse_null() or self._parse_bitwise() 5759 if not expression: 5760 self._retreat(index) 5761 return None 5762 5763 this = self.expression(exp.Is(this=this, expression=expression)) 5764 this = self.expression(exp.Not(this=this)) if negate else this 5765 return self._parse_column_ops(this) 5766 5767 def _parse_in(self, this: exp.Expr | None, alias: bool = False) -> exp.In: 5768 unnest = self._parse_unnest(with_alias=False) 5769 if unnest: 5770 this = self.expression(exp.In(this=this, unnest=unnest)) 5771 elif self._match_set((TokenType.L_PAREN, TokenType.L_BRACKET)): 5772 matched_l_paren = self._prev.token_type == TokenType.L_PAREN 5773 expressions = self._parse_csv(lambda: self._parse_select_or_expression(alias=alias)) 5774 5775 if len(expressions) == 1 and isinstance(query := expressions[0], exp.Query): 5776 this = self.expression( 5777 exp.In(this=this, query=self._parse_query_modifiers(query).subquery(copy=False)) 5778 ) 5779 else: 5780 this = self.expression(exp.In(this=this, expressions=expressions)) 5781 5782 if matched_l_paren: 5783 self._match_r_paren(this) 5784 elif not self._match(TokenType.R_BRACKET, expression=this): 5785 self.raise_error("Expecting ]") 5786 else: 5787 this = self.expression(exp.In(this=this, field=self._parse_column())) 5788 5789 return this 5790 5791 def _parse_between(self, this: exp.Expr | None) -> exp.Between: 5792 symmetric = None 5793 if self._match_text_seq("SYMMETRIC"): 5794 symmetric = True 5795 elif self._match_text_seq("ASYMMETRIC"): 5796 symmetric = False 5797 5798 low = self._parse_bitwise() 5799 self._match(TokenType.AND) 5800 high = self._parse_bitwise() 5801 5802 return self.expression(exp.Between(this=this, low=low, high=high, symmetric=symmetric)) 5803 5804 def _parse_escape(self, this: exp.Expr | None) -> exp.Expr | None: 5805 if not self._match(TokenType.ESCAPE): 5806 return this 5807 return self.expression( 5808 exp.Escape(this=this, expression=self._parse_string() or self._parse_null()) 5809 ) 5810 5811 def _parse_interval_span(self, this: exp.Expr) -> exp.Interval: 5812 # handle day-time format interval span with omitted units: 5813 # INTERVAL '<number days> hh[:][mm[:ss[.ff]]]' <maybe `unit TO unit`> 5814 interval_span_units_omitted = None 5815 if ( 5816 this 5817 and this.is_string 5818 and self.SUPPORTS_OMITTED_INTERVAL_SPAN_UNIT 5819 and exp.INTERVAL_DAY_TIME_RE.match(this.name) 5820 ): 5821 index = self._index 5822 5823 # Var "TO" Var 5824 first_unit = self._parse_var(any_token=True, upper=True) 5825 second_unit = None 5826 if first_unit and self._match_text_seq("TO"): 5827 second_unit = self._parse_var(any_token=True, upper=True) 5828 5829 interval_span_units_omitted = not (first_unit and second_unit) 5830 5831 self._retreat(index) 5832 5833 unit = ( 5834 None 5835 if interval_span_units_omitted 5836 else ( 5837 self._parse_function() 5838 or ( 5839 not self._match_set((TokenType.ALIAS, TokenType.DCOLON), advance=False) 5840 and self._parse_var(any_token=True, upper=True) 5841 ) 5842 ) 5843 ) 5844 5845 # Most dialects support, e.g., the form INTERVAL '5' day, thus we try to parse 5846 # each INTERVAL expression into this canonical form so it's easy to transpile 5847 if this and this.is_number: 5848 this = exp.Literal.string(this.to_py()) 5849 elif this and this.is_string: 5850 parts = exp.INTERVAL_STRING_RE.findall(this.name) 5851 if parts and unit: 5852 # Unconsume the eagerly-parsed unit, since the real unit was part of the string 5853 unit = None 5854 self._retreat(self._index - 1) 5855 5856 if len(parts) == 1: 5857 this = exp.Literal.string(parts[0][0]) 5858 unit = self.expression(exp.Var(this=parts[0][1].upper())) 5859 5860 if self.INTERVAL_SPANS and self._match_text_seq("TO"): 5861 unit = self.expression( 5862 exp.IntervalSpan( 5863 this=unit, 5864 expression=self._parse_function() 5865 or self._parse_var(any_token=True, upper=True), 5866 ) 5867 ) 5868 5869 return self.expression(exp.Interval(this=this, unit=unit)) 5870 5871 def _parse_interval(self, require_interval: bool = True) -> exp.Add | exp.Interval | None: 5872 index = self._index 5873 5874 if not self._match(TokenType.INTERVAL) and require_interval: 5875 return None 5876 5877 if self._match(TokenType.STRING, advance=False): 5878 this = self._parse_primary() 5879 else: 5880 this = self._parse_term() 5881 5882 if not this or ( 5883 isinstance(this, exp.Column) 5884 and not this.table 5885 and not this.this.quoted 5886 and self._curr 5887 and self._curr.text.upper() not in self.dialect.VALID_INTERVAL_UNITS 5888 ): 5889 self._retreat(index) 5890 return None 5891 5892 interval = self._parse_interval_span(this) 5893 5894 index = self._index 5895 self._match(TokenType.PLUS) 5896 5897 # Convert INTERVAL 'val_1' unit_1 [+] ... [+] 'val_n' unit_n into a sum of intervals 5898 if self._match_set((TokenType.STRING, TokenType.NUMBER), advance=False): 5899 return self.expression(exp.Add(this=interval, expression=self._parse_interval(False))) 5900 5901 self._retreat(index) 5902 return interval 5903 5904 def _parse_bitwise(self) -> exp.Expr | None: 5905 this = self._parse_term() 5906 5907 while True: 5908 if self._match_set(self.BITWISE): 5909 this = self.expression( 5910 self.BITWISE[self._prev.token_type](this=this, expression=self._parse_term()) 5911 ) 5912 elif self.dialect.DPIPE_IS_STRING_CONCAT and self._match(TokenType.DPIPE): 5913 this = self.expression( 5914 exp.DPipe( 5915 this=this, 5916 expression=self._parse_term(), 5917 safe=not self.dialect.STRICT_STRING_CONCAT, 5918 ) 5919 ) 5920 elif self._match(TokenType.DQMARK): 5921 this = self.expression( 5922 exp.Coalesce(this=this, expressions=ensure_list(self._parse_term())) 5923 ) 5924 elif self._match_pair(TokenType.LT, TokenType.LT): 5925 this = self.expression( 5926 exp.BitwiseLeftShift(this=this, expression=self._parse_term()) 5927 ) 5928 elif self._match_pair(TokenType.GT, TokenType.GT): 5929 this = self.expression( 5930 exp.BitwiseRightShift(this=this, expression=self._parse_term()) 5931 ) 5932 else: 5933 break 5934 5935 return this 5936 5937 def _parse_term(self) -> exp.Expr | None: 5938 this = self._parse_factor() 5939 5940 while self._match_set(self.TERM): 5941 klass = self.TERM[self._prev.token_type] 5942 comments = self._prev_comments 5943 expression = self._parse_factor() 5944 5945 this = self.expression(klass(this=this, expression=expression), comments=comments) 5946 5947 if isinstance(this, exp.Collate): 5948 expr = this.expression 5949 5950 # Preserve collations such as pg_catalog."default" (Postgres) as columns, otherwise 5951 # fallback to Identifier / Var 5952 if isinstance(expr, exp.Column) and len(expr.parts) == 1: 5953 ident = expr.this 5954 if isinstance(ident, exp.Identifier): 5955 this.set("expression", ident if ident.quoted else exp.var(ident.name)) 5956 5957 return this 5958 5959 def _parse_factor(self) -> exp.Expr | None: 5960 parse_method = self._parse_exponent if self.EXPONENT else self._parse_unary 5961 this = self._parse_at_time_zone(parse_method()) 5962 5963 while self._match_set(self.FACTOR): 5964 klass = self.FACTOR[self._prev.token_type] 5965 comments = self._prev_comments 5966 expression = parse_method() 5967 5968 if not expression and klass is exp.IntDiv and self._prev.text.isalpha(): 5969 self._retreat(self._index - 1) 5970 return this 5971 5972 this = self.expression(klass(this=this, expression=expression), comments=comments) 5973 5974 if isinstance(this, exp.Div): 5975 this.set("typed", self.dialect.TYPED_DIVISION) 5976 this.set("safe", self.dialect.SAFE_DIVISION) 5977 5978 return this 5979 5980 def _parse_exponent(self) -> exp.Expr | None: 5981 this = self._parse_unary() 5982 while self._match_set(self.EXPONENT): 5983 comments = self._prev_comments 5984 this = self.expression( 5985 self.EXPONENT[self._prev.token_type](this=this, expression=self._parse_unary()), 5986 comments=comments, 5987 ) 5988 return this 5989 5990 def _parse_unary(self) -> exp.Expr | None: 5991 if self._match_set(self.UNARY_PARSERS): 5992 return self.UNARY_PARSERS[self._prev.token_type](self) 5993 return self._parse_type() 5994 5995 def _parse_type( 5996 self, parse_interval: bool = True, fallback_to_identifier: bool = False 5997 ) -> exp.Expr | None: 5998 if not fallback_to_identifier and (atom := self._parse_atom()) is not None: 5999 return atom 6000 6001 if interval := parse_interval and self._parse_interval(): 6002 return self._parse_column_ops(interval) 6003 6004 index = self._index 6005 data_type = self._parse_types(check_func=True, allow_identifiers=False) 6006 6007 # parse_types() returns a Cast if we parsed BQ's inline constructor <type>(<values>) e.g. 6008 # STRUCT<a INT, b STRING>(1, 'foo'), which is canonicalized to CAST(<values> AS <type>) 6009 if isinstance(data_type, exp.Cast): 6010 # This constructor can contain ops directly after it, for instance struct unnesting: 6011 # STRUCT<a INT, b STRING>(1, 'foo').* --> CAST(STRUCT(1, 'foo') AS STRUCT<a iNT, b STRING).* 6012 return self._parse_column_ops(data_type) 6013 6014 if data_type: 6015 index2 = self._index 6016 this = self._parse_primary() 6017 6018 if isinstance(this, exp.Literal): 6019 literal = this.name 6020 this = self._parse_column_ops(this) 6021 6022 parser = self.TYPE_LITERAL_PARSERS.get(data_type.this) 6023 if parser: 6024 return parser(self, this, data_type) 6025 6026 if ( 6027 self.ZONE_AWARE_TIMESTAMP_CONSTRUCTOR 6028 and data_type.is_type(exp.DType.TIMESTAMP) 6029 and TIME_ZONE_RE.search(literal) 6030 ): 6031 data_type = exp.DType.TIMESTAMPTZ.into_expr() 6032 6033 return self.expression(exp.Cast(this=this, to=data_type)) 6034 6035 # The expressions arg gets set by the parser when we have something like DECIMAL(38, 0) 6036 # in the input SQL. In that case, we'll produce these tokens: DECIMAL ( 38 , 0 ) 6037 # 6038 # If the index difference here is greater than 1, that means the parser itself must have 6039 # consumed additional tokens such as the DECIMAL scale and precision in the above example. 6040 # 6041 # If it's not greater than 1, then it must be 1, because we've consumed at least the type 6042 # keyword, meaning that the expressions arg of the DataType must have gotten set by a 6043 # callable in the TYPE_CONVERTERS mapping. For example, Snowflake converts DECIMAL to 6044 # DECIMAL(38, 0)) in order to facilitate the data type's transpilation. 6045 # 6046 # In these cases, we don't really want to return the converted type, but instead retreat 6047 # and try to parse a Column or Identifier in the section below. 6048 if data_type.expressions and index2 - index > 1: 6049 self._retreat(index2) 6050 return self._parse_column_ops(data_type) 6051 6052 self._retreat(index) 6053 6054 if fallback_to_identifier: 6055 return self._parse_id_var() 6056 6057 return self._parse_column() 6058 6059 def _parse_type_size(self) -> exp.DataTypeParam | None: 6060 this = self._parse_type() 6061 if not this: 6062 return None 6063 6064 if isinstance(this, exp.Column) and not this.table: 6065 this = exp.var(this.name.upper()) 6066 6067 return self.expression( 6068 exp.DataTypeParam(this=this, expression=self._parse_var(any_token=True)) 6069 ) 6070 6071 def _parse_user_defined_type(self, identifier: exp.Identifier) -> exp.Expr | None: 6072 type_name = identifier.name 6073 6074 while self._match(TokenType.DOT): 6075 type_name = f"{type_name}.{self._advance_any() and self._prev.text}" 6076 6077 return exp.DataType.build(type_name, dialect=self.dialect, udt=True) 6078 6079 def _parse_types( 6080 self, check_func: bool = False, schema: bool = False, allow_identifiers: bool = True 6081 ) -> exp.Expr | None: 6082 index = self._index 6083 this: exp.Expr | None = None 6084 6085 if self._match_set(self.TYPE_TOKENS): 6086 type_token = self._prev.token_type 6087 else: 6088 type_token = None 6089 identifier = allow_identifiers and self._parse_id_var( 6090 any_token=False, tokens=(TokenType.VAR,) 6091 ) 6092 if isinstance(identifier, exp.Identifier): 6093 try: 6094 tokens = self.dialect.tokenize(identifier.name) 6095 except TokenError: 6096 tokens = None 6097 6098 if tokens and (type_token := tokens[0].token_type) in self.TYPE_TOKENS: 6099 if len(tokens) > 1: 6100 return exp.DataType.build(identifier.name, dialect=self.dialect) 6101 elif self.dialect.SUPPORTS_USER_DEFINED_TYPES: 6102 this = self._parse_user_defined_type(identifier) 6103 else: 6104 self._retreat(self._index - 1) 6105 return None 6106 else: 6107 return None 6108 6109 if type_token == TokenType.PSEUDO_TYPE: 6110 return self.expression(exp.PseudoType(this=self._prev.text.upper())) 6111 6112 if type_token == TokenType.OBJECT_IDENTIFIER: 6113 return self.expression(exp.ObjectIdentifier(this=self._prev.text.upper())) 6114 6115 # https://materialize.com/docs/sql/types/map/ 6116 if type_token == TokenType.MAP and self._match(TokenType.L_BRACKET): 6117 key_type = self._parse_types( 6118 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 6119 ) 6120 if not self._match(TokenType.FARROW): 6121 self._retreat(index) 6122 return None 6123 6124 value_type = self._parse_types( 6125 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 6126 ) 6127 if not self._match(TokenType.R_BRACKET): 6128 self._retreat(index) 6129 return None 6130 6131 return exp.DataType( 6132 this=exp.DType.MAP, 6133 expressions=[key_type, value_type], 6134 nested=True, 6135 ) 6136 6137 nested = type_token in self.NESTED_TYPE_TOKENS 6138 is_struct = type_token in self.STRUCT_TYPE_TOKENS 6139 is_aggregate = type_token in self.AGGREGATE_TYPE_TOKENS 6140 expressions = None 6141 maybe_func = False 6142 6143 if self._match(TokenType.L_PAREN): 6144 if is_struct: 6145 expressions = self._parse_csv(lambda: self._parse_struct_types(type_required=True)) 6146 elif nested: 6147 expressions = self._parse_csv( 6148 lambda: self._parse_types( 6149 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 6150 ) 6151 ) 6152 if type_token == TokenType.NULLABLE and len(expressions) == 1: 6153 this = expressions[0] 6154 this.set("nullable", True) 6155 self._match_r_paren() 6156 return this 6157 elif type_token in self.ENUM_TYPE_TOKENS: 6158 expressions = self._parse_csv(self._parse_equality) 6159 elif type_token == TokenType.JSON: 6160 # ClickHouse JSON type supports arguments: JSON(col Type, SKIP col, param=value) 6161 # https://clickhouse.com/docs/sql-reference/data-types/newjson 6162 expressions = self._parse_csv(self._parse_json_type_arg) 6163 elif is_aggregate: 6164 func_or_ident = self._parse_function(anonymous=True) or self._parse_id_var( 6165 any_token=False, tokens=(TokenType.VAR, TokenType.ANY) 6166 ) 6167 if not func_or_ident: 6168 return None 6169 expressions = [func_or_ident] 6170 if self._match(TokenType.COMMA): 6171 expressions.extend( 6172 self._parse_csv( 6173 lambda: self._parse_types( 6174 check_func=check_func, 6175 schema=schema, 6176 allow_identifiers=allow_identifiers, 6177 ) 6178 ) 6179 ) 6180 else: 6181 expressions = self._parse_csv(self._parse_type_size) 6182 6183 # https://docs.snowflake.com/en/sql-reference/data-types-vector 6184 if type_token == TokenType.VECTOR and len(expressions) == 2: 6185 expressions = self._parse_vector_expressions(expressions) 6186 6187 if not self._match(TokenType.R_PAREN): 6188 self._retreat(index) 6189 return None 6190 6191 maybe_func = True 6192 6193 values: list[exp.Expr] | None = None 6194 6195 if nested and self._match(TokenType.LT): 6196 if is_struct: 6197 expressions = self._parse_csv(lambda: self._parse_struct_types(type_required=True)) 6198 else: 6199 expressions = self._parse_csv( 6200 lambda: self._parse_types( 6201 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 6202 ) 6203 ) 6204 6205 if not self._match(TokenType.GT): 6206 self.raise_error("Expecting >") 6207 6208 if self._match_set((TokenType.L_BRACKET, TokenType.L_PAREN)): 6209 values = self._parse_csv(self._parse_disjunction) 6210 if not values and is_struct: 6211 values = None 6212 self._retreat(self._index - 1) 6213 else: 6214 self._match_set((TokenType.R_BRACKET, TokenType.R_PAREN)) 6215 6216 if type_token in self.TIMESTAMPS: 6217 if self._match_text_seq("WITH", "TIME", "ZONE"): 6218 maybe_func = False 6219 tz_type = exp.DType.TIMETZ if type_token in self.TIMES else exp.DType.TIMESTAMPTZ 6220 this = exp.DataType(this=tz_type, expressions=expressions) 6221 elif self._match_text_seq("WITH", "LOCAL", "TIME", "ZONE"): 6222 maybe_func = False 6223 this = exp.DataType(this=exp.DType.TIMESTAMPLTZ, expressions=expressions) 6224 elif self._match_text_seq("WITHOUT", "TIME", "ZONE"): 6225 maybe_func = False 6226 elif type_token == TokenType.INTERVAL: 6227 if self._curr.text.upper() in self.dialect.VALID_INTERVAL_UNITS: 6228 unit = self._parse_var(upper=True) 6229 if self._match_text_seq("TO"): 6230 unit = exp.IntervalSpan(this=unit, expression=self._parse_var(upper=True)) 6231 6232 this = self.expression(exp.DataType(this=self.expression(exp.Interval(unit=unit)))) 6233 else: 6234 this = self.expression(exp.DataType(this=exp.DType.INTERVAL)) 6235 elif type_token == TokenType.VOID: 6236 this = exp.DataType(this=exp.DType.NULL) 6237 6238 if maybe_func and check_func: 6239 index2 = self._index 6240 peek = self._parse_string() 6241 6242 if not peek: 6243 self._retreat(index) 6244 return None 6245 6246 self._retreat(index2) 6247 6248 if not this: 6249 assert type_token is not None 6250 if self._match_text_seq("UNSIGNED"): 6251 unsigned_type_token = self.SIGNED_TO_UNSIGNED_TYPE_TOKEN.get(type_token) 6252 if not unsigned_type_token: 6253 self.raise_error(f"Cannot convert {type_token.name} to unsigned.") 6254 6255 type_token = unsigned_type_token or type_token 6256 6257 # NULLABLE without parentheses can be a column (Presto/Trino) 6258 if type_token == TokenType.NULLABLE and not expressions: 6259 self._retreat(index) 6260 return None 6261 6262 this = exp.DataType( 6263 this=exp.DType[type_token.name], 6264 expressions=expressions, 6265 nested=nested, 6266 ) 6267 6268 # Empty arrays/structs are allowed 6269 if values is not None: 6270 cls = exp.Struct if is_struct else exp.Array 6271 this = exp.cast(cls(expressions=values), this, copy=False) 6272 6273 elif expressions: 6274 this.set("expressions", expressions) 6275 6276 # https://materialize.com/docs/sql/types/list/#type-name 6277 while self._match(TokenType.LIST): 6278 this = exp.DataType(this=exp.DType.LIST, expressions=[this], nested=True) 6279 6280 index = self._index 6281 6282 # Postgres supports the INT ARRAY[3] syntax as a synonym for INT[3] 6283 matched_array = self._match(TokenType.ARRAY) 6284 6285 while self._curr: 6286 datatype_token = self._prev.token_type 6287 matched_l_bracket = self._match(TokenType.L_BRACKET) 6288 6289 if (not matched_l_bracket and not matched_array) or ( 6290 datatype_token == TokenType.ARRAY and self._match(TokenType.R_BRACKET) 6291 ): 6292 # Postgres allows casting empty arrays such as ARRAY[]::INT[], 6293 # not to be confused with the fixed size array parsing 6294 break 6295 6296 matched_array = False 6297 values = self._parse_csv(self._parse_disjunction) or None 6298 if ( 6299 values 6300 and not schema 6301 and ( 6302 not self.dialect.SUPPORTS_FIXED_SIZE_ARRAYS 6303 or datatype_token == TokenType.ARRAY 6304 or not self._match(TokenType.R_BRACKET, advance=False) 6305 ) 6306 ): 6307 # Retreating here means that we should not parse the following values as part of the data type, e.g. in DuckDB 6308 # ARRAY[1] should retreat and instead be parsed into exp.Array in contrast to INT[x][y] which denotes a fixed-size array data type 6309 self._retreat(index) 6310 break 6311 6312 this = exp.DataType( 6313 this=exp.DType.ARRAY, expressions=[this], values=values, nested=True 6314 ) 6315 self._match(TokenType.R_BRACKET) 6316 6317 if self.TYPE_CONVERTERS and isinstance(this.this, exp.DType): 6318 converter = self.TYPE_CONVERTERS.get(this.this) 6319 if converter: 6320 this = converter(t.cast(exp.DataType, this)) 6321 6322 return this 6323 6324 def _parse_json_type_arg(self) -> exp.Expr | None: 6325 """Parse a single argument to ClickHouse's JSON type.""" 6326 6327 # SKIP col or SKIP REGEXP 'pattern' 6328 if self._match_text_seq("SKIP"): 6329 regexp = self._match(TokenType.RLIKE) 6330 arg = self._parse_column() 6331 if isinstance(arg, exp.Column): 6332 arg = arg.to_dot() 6333 return self.expression(exp.SkipJSONColumn(regexp=regexp, expression=arg)) 6334 6335 param_or_col = self._parse_column() 6336 if not isinstance(param_or_col, exp.Column): 6337 return None 6338 6339 # Parameter: name=value (e.g., max_dynamic_paths=2) 6340 if len(param_or_col.parts) == 1 and self._match(TokenType.EQ): 6341 param = param_or_col.name 6342 value = self._parse_primary() 6343 return self.expression(exp.EQ(this=exp.var(param), expression=value)) 6344 6345 # Column type hint: col_name Type 6346 col = param_or_col.to_dot() 6347 kind = self._parse_types(check_func=False, allow_identifiers=False) 6348 return self.expression(exp.ColumnDef(this=col, kind=kind)) 6349 6350 def _parse_vector_expressions(self, expressions: list[exp.Expr]) -> list[exp.Expr]: 6351 return [exp.DataType.build(expressions[0].name, dialect=self.dialect), *expressions[1:]] 6352 6353 def _parse_struct_types(self, type_required: bool = False) -> exp.Expr | None: 6354 index = self._index 6355 6356 if ( 6357 self._curr 6358 and self._next 6359 and self._curr.token_type in self.TYPE_TOKENS 6360 and self._next.token_type in self.TYPE_TOKENS 6361 ): 6362 # Takes care of special cases like `STRUCT<list ARRAY<...>>` where the identifier is also a 6363 # type token. Without this, the list will be parsed as a type and we'll eventually crash 6364 this = self._parse_id_var() 6365 else: 6366 this = ( 6367 self._parse_type(parse_interval=False, fallback_to_identifier=True) 6368 or self._parse_id_var() 6369 ) 6370 6371 self._match(TokenType.COLON) 6372 6373 if ( 6374 type_required 6375 and not isinstance(this, exp.DataType) 6376 and not self._match_set(self.TYPE_TOKENS, advance=False) 6377 ): 6378 self._retreat(index) 6379 return self._parse_types() 6380 6381 return self._parse_column_def(this) 6382 6383 def _parse_at_time_zone(self, this: exp.Expr | None) -> exp.Expr | None: 6384 if not self._match_text_seq("AT", "TIME", "ZONE"): 6385 return this 6386 return self._parse_at_time_zone( 6387 self.expression(exp.AtTimeZone(this=this, zone=self._parse_unary())) 6388 ) 6389 6390 def _parse_atom(self) -> exp.Expr | None: 6391 if ( 6392 self._curr.token_type in self.IDENTIFIER_TOKENS 6393 and (column := self._parse_column()) is not None 6394 ): 6395 return column 6396 6397 token = self._curr 6398 token_type = token.token_type 6399 6400 if not (primary_parser := self.PRIMARY_PARSERS.get(token_type)): 6401 return None 6402 6403 next_type = self._next.token_type 6404 6405 if ( 6406 next_type in self.COLUMN_OPERATORS 6407 or next_type in self.COLUMN_POSTFIX_TOKENS 6408 or (token_type == TokenType.STRING and next_type == TokenType.STRING) 6409 ): 6410 return None 6411 6412 self._advance() 6413 return primary_parser(self, token) 6414 6415 def _parse_column(self) -> exp.Expr | None: 6416 column: exp.Expr | None = self._parse_column_parts_fast() 6417 if column is None: 6418 this = self._parse_column_reference() 6419 if not this: 6420 this = self._parse_bracket(this) 6421 column = self._parse_column_ops(this) if this else this 6422 6423 if column: 6424 if self.dialect.SUPPORTS_COLUMN_JOIN_MARKS: 6425 column.set("join_mark", self._match(TokenType.JOIN_MARKER)) 6426 if self.COLON_IS_VARIANT_EXTRACT: 6427 column = self._parse_colon_as_variant_extract(column) 6428 6429 return column 6430 6431 def _parse_column_parts_fast(self) -> exp.Column | exp.Dot | None: 6432 """Fast path for simple column and dot references (a, a.b, ...). 6433 6434 Greedily consumes VAR/IDENTIFIER tokens separated by DOTs, then checks 6435 that nothing complex follows. If it does, retreats and returns None so 6436 the slow path can handle it. For >4 parts, wraps in exp.Dot nodes. 6437 """ 6438 index = self._index 6439 parts: list[exp.Identifier] | None = None 6440 all_comments: list[str] | None = None 6441 6442 while self._match_set(self.IDENTIFIER_TOKENS): 6443 token = self._prev 6444 comments = self._prev_comments 6445 6446 if parts is None and token.text.upper() in self.NO_PAREN_FUNCTION_PARSERS: 6447 self._retreat(index) 6448 return None 6449 6450 has_dot = self._match(TokenType.DOT) 6451 curr_tt = self._curr.token_type 6452 6453 if not has_dot: 6454 if curr_tt in self.COLUMN_OPERATORS or curr_tt in self.COLUMN_POSTFIX_TOKENS: 6455 self._retreat(index) 6456 return None 6457 elif curr_tt not in self.IDENTIFIER_TOKENS: 6458 self._retreat(index) 6459 return None 6460 6461 if parts is None: 6462 parts = [] 6463 6464 if comments: 6465 if all_comments is None: 6466 all_comments = [] 6467 all_comments.extend(comments) 6468 self._prev_comments = [] 6469 6470 parts.append( 6471 self.expression( 6472 exp.Identifier( 6473 this=token.text, quoted=token.token_type == TokenType.IDENTIFIER 6474 ), 6475 token, 6476 ) 6477 ) 6478 6479 if not has_dot: 6480 break 6481 6482 if parts is None: 6483 return None 6484 6485 n = len(parts) 6486 6487 if n == 1: 6488 column: exp.Column | exp.Dot = exp.Column(this=parts[0]) 6489 elif n == 2: 6490 column = exp.Column(this=parts[1], table=parts[0]) 6491 elif n == 3: 6492 column = exp.Column(this=parts[2], table=parts[1], db=parts[0]) 6493 else: 6494 column = exp.Column(this=parts[3], table=parts[2], db=parts[1], catalog=parts[0]) 6495 6496 for i in range(4, n): 6497 column = exp.Dot(this=column, expression=parts[i]) 6498 6499 if all_comments: 6500 column.add_comments(all_comments) 6501 6502 return column 6503 6504 def _parse_column_reference(self) -> exp.Expr | None: 6505 this = self._parse_field() 6506 if ( 6507 not this 6508 and self._match(TokenType.VALUES, advance=False) 6509 and self.VALUES_FOLLOWED_BY_PAREN 6510 and (not self._next or self._next.token_type != TokenType.L_PAREN) 6511 ): 6512 this = self._parse_id_var() 6513 6514 if isinstance(this, exp.Identifier): 6515 # We bubble up comments from the Identifier to the Column 6516 this = self.expression(exp.Column(this=this), comments=this.pop_comments()) 6517 6518 return this 6519 6520 def _build_json_extract( 6521 self, 6522 this: exp.Expr | None, 6523 path_parts: list[exp.JSONPathPart], 6524 escape: bool | None, 6525 ) -> tuple[exp.Expr | None, list[exp.JSONPathPart]]: 6526 if len(path_parts) > 1: 6527 this = self.expression( 6528 exp.JSONExtract( 6529 this=this, 6530 expression=exp.JSONPath(expressions=path_parts, escape=escape), 6531 variant_extract=True, 6532 requires_json=self.JSON_EXTRACT_REQUIRES_JSON_EXPRESSION, 6533 ) 6534 ) 6535 path_parts = [exp.JSONPathRoot()] 6536 6537 return this, path_parts 6538 6539 def _parse_colon_as_variant_extract(self, this: exp.Expr | None) -> exp.Expr | None: 6540 path_parts: list[exp.JSONPathPart] = [exp.JSONPathRoot()] 6541 escape = None 6542 6543 while self._match(TokenType.COLON): 6544 key = self._parse_id_var(any_token=True, tokens=(TokenType.SELECT,)) 6545 6546 if key: 6547 if isinstance(key, exp.Identifier) and key.quoted: 6548 escape = True 6549 path_parts.append(exp.JSONPathKey(this=key.name)) 6550 6551 while True: 6552 if self._match(TokenType.DOT): 6553 next_key = self._parse_id_var(any_token=True, tokens=(TokenType.SELECT,)) 6554 6555 if next_key: 6556 if isinstance(next_key, exp.Identifier) and next_key.quoted: 6557 escape = True 6558 path_parts.append(exp.JSONPathKey(this=next_key.name)) 6559 elif self._match(TokenType.L_BRACKET): 6560 bracket_expr = self._parse_bracket_key_value() 6561 6562 if not self._match(TokenType.R_BRACKET): 6563 self.raise_error("Expected ]") 6564 6565 if bracket_expr: 6566 if bracket_expr.is_string: 6567 path_parts.append(exp.JSONPathKey(this=bracket_expr.name)) 6568 escape = True 6569 elif bracket_expr.is_star: 6570 path_parts.append(exp.JSONPathSubscript(this=exp.JSONPathWildcard())) 6571 elif bracket_expr.is_number: 6572 path_parts.append(exp.JSONPathSubscript(this=bracket_expr.to_py())) 6573 else: 6574 this, path_parts = self._build_json_extract(this, path_parts, escape) 6575 escape = None 6576 6577 this = self.expression( 6578 exp.Bracket( 6579 this=this, expressions=[bracket_expr], json_access=True 6580 ), 6581 ) 6582 6583 elif self._match(TokenType.DCOLON): 6584 this, path_parts = self._build_json_extract(this, path_parts, escape) 6585 escape = None 6586 6587 cast_type = self._parse_types() 6588 if cast_type: 6589 this = self.expression(exp.Cast(this=this, to=cast_type)) 6590 else: 6591 self.raise_error("Expected type after '::'") 6592 else: 6593 break 6594 6595 this, _ = self._build_json_extract(this, path_parts, escape) 6596 6597 return this 6598 6599 def _parse_dcolon(self) -> exp.Expr | None: 6600 return self._parse_types() 6601 6602 def _parse_column_ops(self, this: exp.Expr | None) -> exp.Expr | None: 6603 while self._curr.token_type in self.BRACKETS: 6604 this = self._parse_bracket(this) 6605 6606 column_operators = self.COLUMN_OPERATORS 6607 cast_column_operators = self.CAST_COLUMN_OPERATORS 6608 while self._curr: 6609 op_token = self._curr.token_type 6610 6611 if op_token not in column_operators: 6612 break 6613 op = column_operators[op_token] 6614 self._advance() 6615 6616 if op_token in cast_column_operators: 6617 field = self._parse_dcolon() 6618 if not field: 6619 self.raise_error("Expected type") 6620 elif op and self._curr: 6621 field = self._parse_column_reference() or self._parse_bitwise() 6622 if isinstance(field, exp.Column) and self._match(TokenType.DOT, advance=False): 6623 field = self._parse_column_ops(field) 6624 else: 6625 field = self._parse_field(any_token=True, anonymous_func=True) 6626 6627 # Function calls can be qualified, e.g., x.y.FOO() 6628 # This converts the final AST to a series of Dots leading to the function call 6629 # https://cloud.google.com/bigquery/docs/reference/standard-sql/functions-reference#function_call_rules 6630 if isinstance(field, (exp.Func, exp.Window)) and this: 6631 this = this.transform( 6632 lambda n: n.to_dot(include_dots=False) if isinstance(n, exp.Column) else n 6633 ) 6634 6635 if op: 6636 this = op(self, this, field) 6637 elif isinstance(this, exp.Column) and not this.args.get("catalog"): 6638 this = self.expression( 6639 exp.Column( 6640 this=field, 6641 table=this.this, 6642 db=this.args.get("table"), 6643 catalog=this.args.get("db"), 6644 ), 6645 comments=this.comments, 6646 ) 6647 elif isinstance(field, exp.Window): 6648 # Move the exp.Dot's to the window's function 6649 window_func = self.expression(exp.Dot(this=this, expression=field.this)) 6650 field.set("this", window_func) 6651 this = field 6652 else: 6653 this = self.expression(exp.Dot(this=this, expression=field)) 6654 6655 if field and field.comments: 6656 t.cast(exp.Expr, this).add_comments(field.pop_comments()) 6657 6658 this = self._parse_bracket(this) 6659 6660 return this 6661 6662 def _parse_paren(self) -> exp.Expr | None: 6663 if not self._match(TokenType.L_PAREN): 6664 return None 6665 6666 comments = self._prev_comments 6667 query = self._parse_select() 6668 6669 if query: 6670 expressions = [query] 6671 else: 6672 expressions = self._parse_expressions() 6673 6674 this = seq_get(expressions, 0) 6675 6676 if not this and self._match(TokenType.R_PAREN, advance=False): 6677 this = self.expression(exp.Tuple()) 6678 elif isinstance(this, exp.UNWRAPPED_QUERIES): 6679 this = self._parse_subquery(this=this, parse_alias=False) 6680 elif isinstance(this, (exp.Subquery, exp.Values)): 6681 this = self._parse_subquery( 6682 this=self._parse_query_modifiers(self._parse_set_operations(this)), 6683 parse_alias=False, 6684 ) 6685 elif len(expressions) > 1 or self._prev.token_type == TokenType.COMMA: 6686 this = self.expression(exp.Tuple(expressions=expressions)) 6687 else: 6688 this = self.expression(exp.Paren(this=this)) 6689 6690 if this: 6691 this.add_comments(comments) 6692 6693 self._match_r_paren(expression=this) 6694 6695 if isinstance(this, exp.Paren) and isinstance(this.this, exp.AggFunc): 6696 return self._parse_window(this) 6697 6698 return this 6699 6700 def _parse_primary(self) -> exp.Expr | None: 6701 if self._match_set(self.PRIMARY_PARSERS): 6702 token_type = self._prev.token_type 6703 primary = self.PRIMARY_PARSERS[token_type](self, self._prev) 6704 6705 if token_type == TokenType.STRING: 6706 expressions = [primary] 6707 while self._match(TokenType.STRING): 6708 expressions.append(exp.Literal.string(self._prev.text)) 6709 6710 if len(expressions) > 1: 6711 return self.expression( 6712 exp.Concat(expressions=expressions, coalesce=self.dialect.CONCAT_COALESCE) 6713 ) 6714 6715 return primary 6716 6717 if self._match_pair(TokenType.DOT, TokenType.NUMBER): 6718 return exp.Literal.number(f"0.{self._prev.text}") 6719 6720 return self._parse_paren() 6721 6722 def _parse_field( 6723 self, 6724 any_token: bool = False, 6725 tokens: t.Collection[TokenType] | None = None, 6726 anonymous_func: bool = False, 6727 ) -> exp.Expr | None: 6728 if anonymous_func: 6729 field = ( 6730 self._parse_function(anonymous=anonymous_func, any_token=any_token) 6731 or self._parse_primary() 6732 ) 6733 else: 6734 field = self._parse_primary() or self._parse_function( 6735 anonymous=anonymous_func, any_token=any_token 6736 ) 6737 return field or self._parse_id_var(any_token=any_token, tokens=tokens) 6738 6739 def _parse_function( 6740 self, 6741 functions: dict[str, t.Callable] | None = None, 6742 anonymous: bool = False, 6743 optional_parens: bool = True, 6744 any_token: bool = False, 6745 ) -> exp.Expr | None: 6746 # This allows us to also parse {fn <function>} syntax (Snowflake, MySQL support this) 6747 # See: https://community.snowflake.com/s/article/SQL-Escape-Sequences 6748 fn_syntax = False 6749 if ( 6750 self._match(TokenType.L_BRACE, advance=False) 6751 and self._next 6752 and self._next.text.upper() == "FN" 6753 ): 6754 self._advance(2) 6755 fn_syntax = True 6756 6757 func = self._parse_function_call( 6758 functions=functions, 6759 anonymous=anonymous, 6760 optional_parens=optional_parens, 6761 any_token=any_token, 6762 ) 6763 6764 if fn_syntax: 6765 self._match(TokenType.R_BRACE) 6766 6767 return func 6768 6769 def _parse_function_args(self, alias: bool = False) -> list[exp.Expr]: 6770 return self._parse_csv(lambda: self._parse_lambda(alias=alias)) 6771 6772 def _parse_function_call( 6773 self, 6774 functions: dict[str, t.Callable] | None = None, 6775 anonymous: bool = False, 6776 optional_parens: bool = True, 6777 any_token: bool = False, 6778 ) -> exp.Expr | None: 6779 if not self._curr: 6780 return None 6781 6782 comments = self._curr.comments 6783 prev = self._prev 6784 token = self._curr 6785 token_type = self._curr.token_type 6786 this: str | exp.Expr = self._curr.text 6787 upper = self._curr.text.upper() 6788 6789 parser = self.NO_PAREN_FUNCTION_PARSERS.get(upper) 6790 if optional_parens and parser and token_type not in self.INVALID_FUNC_NAME_TOKENS: 6791 self._advance() 6792 return self._parse_window(parser(self)) 6793 6794 if self._next.token_type != TokenType.L_PAREN: 6795 if optional_parens and token_type in self.NO_PAREN_FUNCTIONS: 6796 self._advance() 6797 return self.expression(self.NO_PAREN_FUNCTIONS[token_type]()) 6798 6799 return None 6800 6801 if any_token: 6802 if token_type in self.RESERVED_TOKENS: 6803 return None 6804 elif token_type not in self.FUNC_TOKENS: 6805 return None 6806 6807 self._advance(2) 6808 6809 parser = self.FUNCTION_PARSERS.get(upper) 6810 if parser and not anonymous: 6811 result = parser(self) 6812 else: 6813 subquery_predicate = self.SUBQUERY_PREDICATES.get(token_type) 6814 6815 if subquery_predicate: 6816 expr = None 6817 if self._curr.token_type in self.SUBQUERY_TOKENS: 6818 expr = self._parse_select() 6819 self._match_r_paren() 6820 elif prev and prev.token_type in (TokenType.LIKE, TokenType.ILIKE): 6821 # Backtrack one token since we've consumed the L_PAREN here. Instead, we'd like 6822 # to parse "LIKE [ANY | ALL] (...)" as a whole into an exp.Tuple or exp.Paren 6823 self._advance(-1) 6824 expr = self._parse_bitwise() 6825 6826 if expr: 6827 return self.expression(subquery_predicate(this=expr), comments=comments) 6828 6829 if functions is None: 6830 functions = self.FUNCTIONS 6831 6832 function = functions.get(upper) 6833 known_function = function and not anonymous 6834 6835 alias = not known_function or upper in self.FUNCTIONS_WITH_ALIASED_ARGS 6836 args = self._parse_function_args(alias) 6837 6838 post_func_comments = self._curr.comments if self._curr else None 6839 if known_function and post_func_comments: 6840 # If the user-inputted comment "/* sqlglot.anonymous */" is following the function 6841 # call we'll construct it as exp.Anonymous, even if it's "known" 6842 if any( 6843 comment.lstrip().startswith(exp.SQLGLOT_ANONYMOUS) 6844 for comment in post_func_comments 6845 ): 6846 known_function = False 6847 6848 if alias and known_function: 6849 args = self._kv_to_prop_eq(args) 6850 6851 if known_function: 6852 func_builder = t.cast(t.Callable, function) 6853 6854 # mypyc compiled functions don't have __code__, so we use 6855 # try/except to check if func_builder accepts 'dialect'. 6856 try: 6857 func = func_builder(args) 6858 except TypeError: 6859 func = func_builder(args, dialect=self.dialect) 6860 6861 func = self.validate_expression(func, args) 6862 if self.dialect.PRESERVE_ORIGINAL_NAMES: 6863 func.meta["name"] = this 6864 6865 result = func 6866 else: 6867 if token_type == TokenType.IDENTIFIER: 6868 this = exp.Identifier(this=this, quoted=True).update_positions(token) 6869 6870 result = self.expression(exp.Anonymous(this=this, expressions=args)) 6871 6872 result = result.update_positions(token) 6873 6874 if isinstance(result, exp.Expr): 6875 result.add_comments(comments) 6876 6877 if parser: 6878 self._match(TokenType.R_PAREN, expression=result) 6879 else: 6880 self._match_r_paren(result) 6881 return self._parse_window(result) 6882 6883 def _to_prop_eq(self, expression: exp.Expr, index: int) -> exp.Expr: 6884 return expression 6885 6886 def _kv_to_prop_eq( 6887 self, expressions: list[exp.Expr], parse_map: bool = False 6888 ) -> list[exp.Expr]: 6889 transformed = [] 6890 6891 for index, e in enumerate(expressions): 6892 if isinstance(e, self.KEY_VALUE_DEFINITIONS): 6893 if isinstance(e, exp.Alias): 6894 e = self.expression(exp.PropertyEQ(this=e.args.get("alias"), expression=e.this)) 6895 6896 if not isinstance(e, exp.PropertyEQ): 6897 e = self.expression( 6898 exp.PropertyEQ( 6899 this=e.this if parse_map else exp.to_identifier(e.this.name), 6900 expression=e.expression, 6901 ) 6902 ) 6903 6904 if isinstance(e.this, exp.Column): 6905 e.this.replace(e.this.this) 6906 else: 6907 e = self._to_prop_eq(e, index) 6908 6909 transformed.append(e) 6910 6911 return transformed 6912 6913 def _parse_function_properties(self) -> exp.Properties | None: 6914 return self._parse_properties() 6915 6916 def _parse_user_defined_function_expression(self) -> exp.Expr | None: 6917 return self._parse_statement() 6918 6919 def _parse_function_parameter(self) -> exp.Expr | None: 6920 return self._parse_column_def(this=self._parse_id_var(), computed_column=False) 6921 6922 def _parse_user_defined_function(self, kind: TokenType | None = None) -> exp.Expr | None: 6923 this = self._parse_table_parts(schema=True) 6924 6925 if not self._match(TokenType.L_PAREN): 6926 return this 6927 6928 expressions = self._parse_csv(self._parse_function_parameter) 6929 self._match_r_paren() 6930 return self.expression( 6931 exp.UserDefinedFunction(this=this, expressions=expressions, wrapped=True) 6932 ) 6933 6934 def _parse_introducer(self, token: Token) -> exp.Introducer | exp.Identifier: 6935 literal = self._parse_primary() 6936 if literal: 6937 return self.expression(exp.Introducer(this=token.text, expression=literal), token) 6938 6939 return self._identifier_expression(token) 6940 6941 def _parse_session_parameter(self) -> exp.SessionParameter: 6942 kind = None 6943 this = self._parse_id_var() or self._parse_primary() 6944 6945 if this and self._match(TokenType.DOT): 6946 kind = this.name 6947 this = self._parse_var() or self._parse_primary() 6948 6949 return self.expression(exp.SessionParameter(this=this, kind=kind)) 6950 6951 def _parse_lambda_arg(self) -> exp.Expr | None: 6952 return self._parse_id_var() 6953 6954 def _parse_lambda(self, alias: bool = False) -> exp.Expr | None: 6955 next_token_type = self._next.token_type 6956 6957 # Fast path: simple atom (column, literal, null, bool) followed by , or ) 6958 if ( 6959 next_token_type in self.LAMBDA_ARG_TERMINATORS 6960 and (atom := self._parse_atom()) is not None 6961 ): 6962 return atom 6963 6964 index = self._index 6965 6966 if self._match(TokenType.L_PAREN): 6967 expressions = t.cast( 6968 list[t.Optional[exp.Expr]], self._parse_csv(self._parse_lambda_arg) 6969 ) 6970 6971 if not self._match(TokenType.R_PAREN): 6972 self._retreat(index) 6973 elif self._match_set(self.LAMBDAS): 6974 return self.LAMBDAS[self._prev.token_type](self, expressions) 6975 else: 6976 self._retreat(index) 6977 elif self.TYPED_LAMBDA_ARGS or next_token_type in self.LAMBDAS: 6978 expressions = [self._parse_lambda_arg()] 6979 6980 if self._match_set(self.LAMBDAS): 6981 return self.LAMBDAS[self._prev.token_type](self, expressions) 6982 6983 self._retreat(index) 6984 6985 this: exp.Expr | None 6986 6987 if self._match(TokenType.DISTINCT): 6988 this = self.expression( 6989 exp.Distinct(expressions=self._parse_csv(self._parse_disjunction)) 6990 ) 6991 else: 6992 this = self._parse_select_or_expression(alias=alias) 6993 6994 return self._parse_limit( 6995 self._parse_respect_or_ignore_nulls( 6996 self._parse_order(self._parse_having_max(self._parse_respect_or_ignore_nulls(this))) 6997 ) 6998 ) 6999 7000 def _parse_schema(self, this: exp.Expr | None = None) -> exp.Expr | None: 7001 index = self._index 7002 if not self._match(TokenType.L_PAREN): 7003 return this 7004 7005 # Disambiguate between schema and subquery/CTE, e.g. in INSERT INTO table (<expr>), 7006 # expr can be of both types 7007 if self._match_set(self.SELECT_START_TOKENS): 7008 self._retreat(index) 7009 return this 7010 args = self._parse_csv(lambda: self._parse_constraint() or self._parse_field_def()) 7011 self._match_r_paren() 7012 return self.expression(exp.Schema(this=this, expressions=args)) 7013 7014 def _parse_field_def(self) -> exp.Expr | None: 7015 return self._parse_column_def(self._parse_field(any_token=True)) 7016 7017 def _parse_column_def( 7018 self, this: exp.Expr | None, computed_column: bool = True 7019 ) -> exp.Expr | None: 7020 # column defs are not really columns, they're identifiers 7021 if isinstance(this, exp.Column): 7022 this = this.this 7023 7024 if not computed_column: 7025 self._match(TokenType.ALIAS) 7026 7027 kind = self._parse_types(schema=True) 7028 7029 if self._match_text_seq("FOR", "ORDINALITY"): 7030 return self.expression(exp.ColumnDef(this=this, ordinality=True)) 7031 7032 constraints: list[exp.Expr] = [] 7033 7034 if (not kind and self._match(TokenType.ALIAS)) or self._match_texts( 7035 ("ALIAS", "MATERIALIZED") 7036 ): 7037 persisted = self._prev.text.upper() == "MATERIALIZED" 7038 constraint_kind = exp.ComputedColumnConstraint( 7039 this=self._parse_disjunction(), 7040 persisted=persisted or self._match_text_seq("PERSISTED"), 7041 data_type=exp.Var(this="AUTO") 7042 if self._match_text_seq("AUTO") 7043 else self._parse_types(), 7044 not_null=self._match_pair(TokenType.NOT, TokenType.NULL), 7045 ) 7046 constraints.append(self.expression(exp.ColumnConstraint(kind=constraint_kind))) 7047 elif not kind and self._match_set({TokenType.IN, TokenType.OUT}, advance=False): 7048 in_out_constraint = self.expression( 7049 exp.InOutColumnConstraint( 7050 input_=self._match(TokenType.IN), output=self._match(TokenType.OUT) 7051 ) 7052 ) 7053 constraints.append(in_out_constraint) 7054 kind = self._parse_types() 7055 elif ( 7056 kind 7057 and self._match(TokenType.ALIAS, advance=False) 7058 and ( 7059 not self.WRAPPED_TRANSFORM_COLUMN_CONSTRAINT 7060 or self._next.token_type == TokenType.L_PAREN 7061 ) 7062 ): 7063 self._advance() 7064 constraints.append( 7065 self.expression( 7066 exp.ColumnConstraint( 7067 kind=exp.ComputedColumnConstraint( 7068 this=self._parse_disjunction(), 7069 persisted=self._match_texts(("STORED", "VIRTUAL")) 7070 and self._prev.text.upper() == "STORED", 7071 ) 7072 ) 7073 ) 7074 ) 7075 7076 while True: 7077 constraint = self._parse_column_constraint() 7078 if not constraint: 7079 break 7080 constraints.append(constraint) 7081 7082 if not kind and not constraints: 7083 return this 7084 7085 return self.expression(exp.ColumnDef(this=this, kind=kind, constraints=constraints)) 7086 7087 def _parse_auto_increment( 7088 self, 7089 ) -> exp.GeneratedAsIdentityColumnConstraint | exp.AutoIncrementColumnConstraint: 7090 start = None 7091 increment = None 7092 order = None 7093 7094 if self._match(TokenType.L_PAREN, advance=False): 7095 args = self._parse_wrapped_csv(self._parse_bitwise) 7096 start = seq_get(args, 0) 7097 increment = seq_get(args, 1) 7098 elif self._match_text_seq("START"): 7099 start = self._parse_bitwise() 7100 self._match_text_seq("INCREMENT") 7101 increment = self._parse_bitwise() 7102 if self._match_text_seq("ORDER"): 7103 order = True 7104 elif self._match_text_seq("NOORDER"): 7105 order = False 7106 7107 if start and increment: 7108 return exp.GeneratedAsIdentityColumnConstraint( 7109 start=start, increment=increment, this=False, order=order 7110 ) 7111 7112 return exp.AutoIncrementColumnConstraint() 7113 7114 def _parse_check_constraint(self) -> exp.CheckColumnConstraint | None: 7115 if not self._match(TokenType.L_PAREN, advance=False): 7116 return None 7117 7118 return self.expression( 7119 exp.CheckColumnConstraint( 7120 this=self._parse_wrapped(self._parse_assignment), 7121 enforced=self._match_text_seq("ENFORCED"), 7122 ) 7123 ) 7124 7125 def _parse_auto_property(self) -> exp.AutoRefreshProperty | None: 7126 if not self._match_text_seq("REFRESH"): 7127 self._retreat(self._index - 1) 7128 return None 7129 return self.expression(exp.AutoRefreshProperty(this=self._parse_var(upper=True))) 7130 7131 def _parse_compress(self) -> exp.CompressColumnConstraint: 7132 if self._match(TokenType.L_PAREN, advance=False): 7133 return self.expression( 7134 exp.CompressColumnConstraint(this=self._parse_wrapped_csv(self._parse_bitwise)) 7135 ) 7136 7137 return self.expression(exp.CompressColumnConstraint(this=self._parse_bitwise())) 7138 7139 def _parse_generated_as_identity( 7140 self, 7141 ) -> ( 7142 exp.GeneratedAsIdentityColumnConstraint 7143 | exp.ComputedColumnConstraint 7144 | exp.GeneratedAsRowColumnConstraint 7145 ): 7146 if self._match_text_seq("BY", "DEFAULT"): 7147 on_null = self._match_pair(TokenType.ON, TokenType.NULL) 7148 this = self.expression( 7149 exp.GeneratedAsIdentityColumnConstraint(this=False, on_null=on_null) 7150 ) 7151 else: 7152 self._match_text_seq("ALWAYS") 7153 this = self.expression(exp.GeneratedAsIdentityColumnConstraint(this=True)) 7154 7155 self._match(TokenType.ALIAS) 7156 7157 if self._match_text_seq("ROW"): 7158 start = self._match_text_seq("START") 7159 if not start: 7160 self._match(TokenType.END) 7161 hidden = self._match_text_seq("HIDDEN") 7162 return self.expression(exp.GeneratedAsRowColumnConstraint(start=start, hidden=hidden)) 7163 7164 identity = self._match_text_seq("IDENTITY") 7165 7166 if self._match(TokenType.L_PAREN): 7167 if self._match(TokenType.START_WITH): 7168 this.set("start", self._parse_bitwise()) 7169 if self._match_text_seq("INCREMENT", "BY"): 7170 this.set("increment", self._parse_bitwise()) 7171 if self._match_text_seq("MINVALUE"): 7172 this.set("minvalue", self._parse_bitwise()) 7173 if self._match_text_seq("MAXVALUE"): 7174 this.set("maxvalue", self._parse_bitwise()) 7175 7176 if self._match_text_seq("CYCLE"): 7177 this.set("cycle", True) 7178 elif self._match_text_seq("NO", "CYCLE"): 7179 this.set("cycle", False) 7180 7181 if not identity: 7182 this.set("expression", self._parse_range()) 7183 elif not this.args.get("start") and self._match(TokenType.NUMBER, advance=False): 7184 args = self._parse_csv(self._parse_bitwise) 7185 this.set("start", seq_get(args, 0)) 7186 this.set("increment", seq_get(args, 1)) 7187 7188 self._match_r_paren() 7189 7190 return this 7191 7192 def _parse_inline(self) -> exp.InlineLengthColumnConstraint: 7193 self._match_text_seq("LENGTH") 7194 return self.expression(exp.InlineLengthColumnConstraint(this=self._parse_bitwise())) 7195 7196 def _parse_not_constraint(self) -> exp.Expr | None: 7197 if self._match_text_seq("NULL"): 7198 return self.expression(exp.NotNullColumnConstraint()) 7199 if self._match_text_seq("CASESPECIFIC"): 7200 return self.expression(exp.CaseSpecificColumnConstraint(not_=True)) 7201 if self._match_text_seq("FOR", "REPLICATION"): 7202 return self.expression(exp.NotForReplicationColumnConstraint()) 7203 7204 # Unconsume the `NOT` token 7205 self._retreat(self._index - 1) 7206 return None 7207 7208 def _parse_column_constraint(self) -> exp.Expr | None: 7209 this = self._parse_id_var() if self._match(TokenType.CONSTRAINT) else None 7210 7211 procedure_option_follows = ( 7212 self._match(TokenType.WITH, advance=False) 7213 and self._next 7214 and self._next.text.upper() in self.PROCEDURE_OPTIONS 7215 ) 7216 7217 if not procedure_option_follows and self._match_texts(self.CONSTRAINT_PARSERS): 7218 constraint = self.CONSTRAINT_PARSERS[self._prev.text.upper()](self) 7219 if not constraint: 7220 self._retreat(self._index - 1) 7221 return None 7222 7223 return self.expression(exp.ColumnConstraint(this=this, kind=constraint)) 7224 7225 return this 7226 7227 def _parse_constraint(self) -> exp.Expr | None: 7228 if not self._match(TokenType.CONSTRAINT): 7229 return self._parse_unnamed_constraint(constraints=self.SCHEMA_UNNAMED_CONSTRAINTS) 7230 7231 return self.expression( 7232 exp.Constraint(this=self._parse_id_var(), expressions=self._parse_unnamed_constraints()) 7233 ) 7234 7235 def _parse_unnamed_constraints(self) -> list[exp.Expr]: 7236 constraints = [] 7237 while True: 7238 constraint = self._parse_unnamed_constraint() or self._parse_function() 7239 if not constraint: 7240 break 7241 constraints.append(constraint) 7242 7243 return constraints 7244 7245 def _parse_unnamed_constraint( 7246 self, constraints: t.Collection[str] | None = None 7247 ) -> exp.Expr | None: 7248 index = self._index 7249 7250 if self._match(TokenType.IDENTIFIER, advance=False) or not self._match_texts( 7251 constraints or self.CONSTRAINT_PARSERS 7252 ): 7253 return None 7254 7255 constraint_key = self._prev.text.upper() 7256 if constraint_key not in self.CONSTRAINT_PARSERS: 7257 self.raise_error(f"No parser found for schema constraint {constraint_key}.") 7258 7259 result = self.CONSTRAINT_PARSERS[constraint_key](self) 7260 if not result: 7261 self._retreat(index) 7262 7263 return result 7264 7265 def _parse_unique_key(self) -> exp.Expr | None: 7266 if ( 7267 self._curr 7268 and self._curr.token_type != TokenType.IDENTIFIER 7269 and self._curr.text.upper() in self.CONSTRAINT_PARSERS 7270 ): 7271 return None 7272 return self._parse_id_var(any_token=False) 7273 7274 def _parse_unique(self) -> exp.UniqueColumnConstraint: 7275 self._match_texts(("KEY", "INDEX")) 7276 return self.expression( 7277 exp.UniqueColumnConstraint( 7278 nulls=self._match_text_seq("NULLS", "NOT", "DISTINCT"), 7279 this=self._parse_schema(self._parse_unique_key()), 7280 index_type=self._match(TokenType.USING) and self._advance_any() and self._prev.text, 7281 on_conflict=self._parse_on_conflict(), 7282 options=self._parse_key_constraint_options(), 7283 ) 7284 ) 7285 7286 def _parse_key_constraint_options(self) -> list[str]: 7287 options = [] 7288 while True: 7289 if not self._curr: 7290 break 7291 7292 if self._match(TokenType.ON): 7293 action = None 7294 on = self._advance_any() and self._prev.text 7295 7296 if self._match_text_seq("NO", "ACTION"): 7297 action = "NO ACTION" 7298 elif self._match_text_seq("CASCADE"): 7299 action = "CASCADE" 7300 elif self._match_text_seq("RESTRICT"): 7301 action = "RESTRICT" 7302 elif self._match_pair(TokenType.SET, TokenType.NULL): 7303 action = "SET NULL" 7304 elif self._match_pair(TokenType.SET, TokenType.DEFAULT): 7305 action = "SET DEFAULT" 7306 else: 7307 self.raise_error("Invalid key constraint") 7308 7309 options.append(f"ON {on} {action}") 7310 else: 7311 var = self._parse_var_from_options( 7312 self.KEY_CONSTRAINT_OPTIONS, raise_unmatched=False 7313 ) 7314 if not var: 7315 break 7316 options.append(var.name) 7317 7318 return options 7319 7320 def _parse_references(self, match: bool = True) -> exp.Reference | None: 7321 if match and not self._match(TokenType.REFERENCES): 7322 return None 7323 7324 expressions: list | None = None 7325 this = self._parse_table(schema=True) 7326 options = self._parse_key_constraint_options() 7327 return self.expression(exp.Reference(this=this, expressions=expressions, options=options)) 7328 7329 def _parse_foreign_key(self) -> exp.ForeignKey: 7330 expressions = ( 7331 self._parse_wrapped_id_vars() 7332 if not self._match(TokenType.REFERENCES, advance=False) 7333 else None 7334 ) 7335 reference = self._parse_references() 7336 on_options = {} 7337 7338 while self._match(TokenType.ON): 7339 if not self._match_set((TokenType.DELETE, TokenType.UPDATE)): 7340 self.raise_error("Expected DELETE or UPDATE") 7341 7342 kind = self._prev.text.lower() 7343 7344 if self._match_text_seq("NO", "ACTION"): 7345 action = "NO ACTION" 7346 elif self._match(TokenType.SET): 7347 self._match_set((TokenType.NULL, TokenType.DEFAULT)) 7348 action = "SET " + self._prev.text.upper() 7349 else: 7350 self._advance() 7351 action = self._prev.text.upper() 7352 7353 on_options[kind] = action 7354 7355 return self.expression( 7356 exp.ForeignKey( 7357 expressions=expressions, 7358 reference=reference, 7359 options=self._parse_key_constraint_options(), 7360 **on_options, 7361 ) 7362 ) 7363 7364 def _parse_primary_key_part(self) -> exp.Expr | None: 7365 return self._parse_field() 7366 7367 def _parse_period_for_system_time(self) -> exp.PeriodForSystemTimeConstraint | None: 7368 if not self._match(TokenType.TIMESTAMP_SNAPSHOT): 7369 self._retreat(self._index - 1) 7370 return None 7371 7372 id_vars = self._parse_wrapped_id_vars() 7373 return self.expression( 7374 exp.PeriodForSystemTimeConstraint( 7375 this=seq_get(id_vars, 0), expression=seq_get(id_vars, 1) 7376 ) 7377 ) 7378 7379 def _parse_primary_key( 7380 self, 7381 wrapped_optional: bool = False, 7382 in_props: bool = False, 7383 named_primary_key: bool = False, 7384 ) -> exp.PrimaryKeyColumnConstraint | exp.PrimaryKey: 7385 desc = ( 7386 self._prev.token_type == TokenType.DESC 7387 if self._match_set((TokenType.ASC, TokenType.DESC)) 7388 else None 7389 ) 7390 7391 this = None 7392 if ( 7393 named_primary_key 7394 and self._curr.text.upper() not in self.CONSTRAINT_PARSERS 7395 and self._next 7396 and self._next.token_type == TokenType.L_PAREN 7397 ): 7398 this = self._parse_id_var() 7399 7400 if not in_props and not self._match(TokenType.L_PAREN, advance=False): 7401 return self.expression( 7402 exp.PrimaryKeyColumnConstraint( 7403 desc=desc, options=self._parse_key_constraint_options() 7404 ) 7405 ) 7406 7407 expressions = self._parse_wrapped_csv( 7408 self._parse_primary_key_part, optional=wrapped_optional 7409 ) 7410 7411 return self.expression( 7412 exp.PrimaryKey( 7413 this=this, 7414 expressions=expressions, 7415 include=self._parse_index_params(), 7416 options=self._parse_key_constraint_options(), 7417 ) 7418 ) 7419 7420 def _parse_bracket_key_value(self, is_map: bool = False) -> exp.Expr | None: 7421 return self._parse_slice(self._parse_alias(self._parse_disjunction(), explicit=True)) 7422 7423 def _parse_odbc_datetime_literal(self) -> exp.Expr: 7424 """ 7425 Parses a datetime column in ODBC format. We parse the column into the corresponding 7426 types, for example `{d'yyyy-mm-dd'}` will be parsed as a `Date` column, exactly the 7427 same as we did for `DATE('yyyy-mm-dd')`. 7428 7429 Reference: 7430 https://learn.microsoft.com/en-us/sql/odbc/reference/develop-app/date-time-and-timestamp-literals 7431 """ 7432 self._match(TokenType.VAR) 7433 exp_class = self.ODBC_DATETIME_LITERALS[self._prev.text.lower()] 7434 expression = self.expression(exp_class(this=self._parse_string())) 7435 if not self._match(TokenType.R_BRACE): 7436 self.raise_error("Expected }") 7437 return expression 7438 7439 def _parse_bracket(self, this: exp.Expr | None = None) -> exp.Expr | None: 7440 if not self._match_set(self.BRACKETS): 7441 return this 7442 7443 if self.MAP_KEYS_ARE_ARBITRARY_EXPRESSIONS: 7444 map_token = seq_get(self._tokens, self._index - 2) 7445 parse_map = map_token is not None and map_token.text.upper() == "MAP" 7446 else: 7447 parse_map = False 7448 7449 bracket_kind = self._prev.token_type 7450 if ( 7451 bracket_kind == TokenType.L_BRACE 7452 and self._curr 7453 and self._curr.token_type == TokenType.VAR 7454 and self._curr.text.lower() in self.ODBC_DATETIME_LITERALS 7455 ): 7456 return self._parse_odbc_datetime_literal() 7457 7458 expressions = self._parse_csv( 7459 lambda: self._parse_bracket_key_value(is_map=bracket_kind == TokenType.L_BRACE) 7460 ) 7461 7462 if bracket_kind == TokenType.L_BRACKET and not self._match(TokenType.R_BRACKET): 7463 self.raise_error("Expected ]") 7464 elif bracket_kind == TokenType.L_BRACE and not self._match(TokenType.R_BRACE): 7465 self.raise_error("Expected }") 7466 7467 # https://duckdb.org/docs/sql/data_types/struct.html#creating-structs 7468 if bracket_kind == TokenType.L_BRACE: 7469 this = self.expression( 7470 exp.Struct( 7471 expressions=self._kv_to_prop_eq(expressions=expressions, parse_map=parse_map) 7472 ) 7473 ) 7474 elif not this: 7475 this = build_array_constructor( 7476 exp.Array, args=expressions, bracket_kind=bracket_kind, dialect=self.dialect 7477 ) 7478 else: 7479 constructor_type = self.ARRAY_CONSTRUCTORS.get(this.name.upper()) 7480 if constructor_type: 7481 return build_array_constructor( 7482 constructor_type, 7483 args=expressions, 7484 bracket_kind=bracket_kind, 7485 dialect=self.dialect, 7486 ) 7487 7488 expressions = apply_index_offset( 7489 this, expressions, -self.dialect.INDEX_OFFSET, dialect=self.dialect 7490 ) 7491 this = self.expression( 7492 exp.Bracket(this=this, expressions=expressions), comments=this.pop_comments() 7493 ) 7494 7495 self._add_comments(this) 7496 return self._parse_bracket(this) 7497 7498 def _parse_slice(self, this: exp.Expr | None) -> exp.Expr | None: 7499 if not self._match(TokenType.COLON): 7500 return this 7501 7502 if self._match_pair(TokenType.DASH, TokenType.COLON, advance=False): 7503 self._advance() 7504 end: exp.Expr | None = -exp.Literal.number("1") 7505 else: 7506 end = self._parse_assignment() 7507 step = self._parse_unary() if self._match(TokenType.COLON) else None 7508 return self.expression(exp.Slice(this=this, expression=end, step=step)) 7509 7510 def _parse_case(self) -> exp.Expr | None: 7511 if self._match(TokenType.DOT, advance=False): 7512 # Avoid raising on valid expressions like case.*, supported by, e.g., spark & snowflake 7513 self._retreat(self._index - 1) 7514 return None 7515 7516 ifs = [] 7517 default = None 7518 7519 comments = self._prev_comments 7520 expression = self._parse_disjunction() 7521 7522 while self._match(TokenType.WHEN): 7523 this = self._parse_disjunction() 7524 self._match(TokenType.THEN) 7525 then = self._parse_disjunction() 7526 ifs.append(self.expression(exp.If(this=this, true=then))) 7527 7528 if self._match(TokenType.ELSE): 7529 default = self._parse_disjunction() 7530 7531 if not self._match(TokenType.END): 7532 if isinstance(default, exp.Interval) and default.this.sql().upper() == "END": 7533 default = exp.column("interval") 7534 else: 7535 self.raise_error("Expected END after CASE", self._prev) 7536 7537 return self.expression( 7538 exp.Case(this=expression, ifs=ifs, default=default), comments=comments 7539 ) 7540 7541 def _parse_if(self) -> exp.Expr | None: 7542 if self._match(TokenType.L_PAREN): 7543 args = self._parse_csv( 7544 lambda: self._parse_alias(self._parse_assignment(), explicit=True) 7545 ) 7546 this = self.validate_expression(exp.If.from_arg_list(args), args) 7547 self._match_r_paren() 7548 else: 7549 index = self._index - 1 7550 7551 if self.NO_PAREN_IF_COMMANDS and index == 0: 7552 return self._parse_as_command(self._prev) 7553 7554 condition = self._parse_disjunction() 7555 7556 if not condition: 7557 self._retreat(index) 7558 return None 7559 7560 self._match(TokenType.THEN) 7561 true = self._parse_disjunction() 7562 false = self._parse_disjunction() if self._match(TokenType.ELSE) else None 7563 self._match(TokenType.END) 7564 this = self.expression(exp.If(this=condition, true=true, false=false)) 7565 7566 return this 7567 7568 def _parse_next_value_for(self) -> exp.Expr | None: 7569 if not self._match_text_seq("VALUE", "FOR"): 7570 self._retreat(self._index - 1) 7571 return None 7572 7573 return self.expression( 7574 exp.NextValueFor( 7575 this=self._parse_column(), 7576 order=self._match(TokenType.OVER) and self._parse_wrapped(self._parse_order), 7577 ) 7578 ) 7579 7580 def _parse_extract(self) -> exp.Extract | exp.Anonymous: 7581 this = self._parse_function() or self._parse_var_or_string(upper=True) 7582 7583 if self._match(TokenType.FROM): 7584 return self.expression(exp.Extract(this=this, expression=self._parse_bitwise())) 7585 7586 if not self._match(TokenType.COMMA): 7587 self.raise_error("Expected FROM or comma after EXTRACT", self._prev) 7588 7589 return self.expression(exp.Extract(this=this, expression=self._parse_bitwise())) 7590 7591 def _parse_gap_fill(self) -> exp.GapFill: 7592 self._match(TokenType.TABLE) 7593 this = self._parse_table() 7594 7595 self._match(TokenType.COMMA) 7596 args = [this, *self._parse_csv(self._parse_lambda)] 7597 7598 gap_fill = exp.GapFill.from_arg_list(args) 7599 return self.validate_expression(gap_fill, args) 7600 7601 def _parse_char(self) -> exp.Chr: 7602 return self.expression( 7603 exp.Chr( 7604 expressions=self._parse_csv(self._parse_assignment), 7605 charset=self._match(TokenType.USING) and self._parse_var(), 7606 ) 7607 ) 7608 7609 def _parse_cast(self, strict: bool, safe: bool | None = None) -> exp.Expr: 7610 this = self._parse_assignment() 7611 7612 if not self._match(TokenType.ALIAS): 7613 if self._match(TokenType.COMMA): 7614 return self.expression(exp.CastToStrType(this=this, to=self._parse_string())) 7615 7616 self.raise_error("Expected AS after CAST") 7617 7618 fmt = None 7619 to = self._parse_types() 7620 7621 default = None 7622 if self._match(TokenType.DEFAULT): 7623 default = self._parse_bitwise() 7624 self._match_text_seq("ON", "CONVERSION", "ERROR") 7625 7626 if self._match_set((TokenType.FORMAT, TokenType.COMMA)): 7627 fmt_string = self._parse_wrapped(self._parse_string, optional=True) 7628 fmt = self._parse_at_time_zone(fmt_string) 7629 7630 if not to: 7631 to = exp.DType.UNKNOWN.into_expr() 7632 if to.this in exp.DataType.TEMPORAL_TYPES: 7633 this = self.expression( 7634 (exp.StrToDate if to.this == exp.DType.DATE else exp.StrToTime)( 7635 this=this, 7636 format=exp.Literal.string( 7637 format_time( 7638 fmt_string.this if fmt_string else "", 7639 self.dialect.FORMAT_MAPPING or self.dialect.TIME_MAPPING, 7640 self.dialect.FORMAT_TRIE or self.dialect.TIME_TRIE, 7641 ) 7642 ), 7643 safe=safe, 7644 ) 7645 ) 7646 7647 if isinstance(fmt, exp.AtTimeZone) and isinstance(this, exp.StrToTime): 7648 this.set("zone", fmt.args["zone"]) 7649 return this 7650 elif not to: 7651 self.raise_error("Expected TYPE after CAST") 7652 elif isinstance(to, exp.Identifier): 7653 to = exp.DataType.build(to.name, dialect=self.dialect, udt=True) 7654 elif to.this == exp.DType.CHAR and self._match(TokenType.CHARACTER_SET): 7655 to = exp.DType.CHARACTER_SET.into_expr(kind=self._parse_var_or_string()) 7656 7657 return self.build_cast( 7658 strict=strict, 7659 this=this, 7660 to=to, 7661 format=fmt, 7662 safe=safe, 7663 action=self._parse_var_from_options(self.CAST_ACTIONS, raise_unmatched=False), 7664 default=default, 7665 ) 7666 7667 def _parse_string_agg(self) -> exp.GroupConcat: 7668 if self._match(TokenType.DISTINCT): 7669 args: list[exp.Expr | None] = [ 7670 self.expression(exp.Distinct(expressions=[self._parse_disjunction()])) 7671 ] 7672 if self._match(TokenType.COMMA): 7673 args.extend(self._parse_csv(self._parse_disjunction)) 7674 else: 7675 args = self._parse_csv(self._parse_disjunction) # type: ignore 7676 7677 if self._match_text_seq("ON", "OVERFLOW"): 7678 # trino: LISTAGG(expression [, separator] [ON OVERFLOW overflow_behavior]) 7679 if self._match_text_seq("ERROR"): 7680 on_overflow: exp.Expr | None = exp.var("ERROR") 7681 else: 7682 self._match_text_seq("TRUNCATE") 7683 on_overflow = self.expression( 7684 exp.OverflowTruncateBehavior( 7685 this=self._parse_string(), 7686 with_count=( 7687 self._match_text_seq("WITH", "COUNT") 7688 or not self._match_text_seq("WITHOUT", "COUNT") 7689 ), 7690 ) 7691 ) 7692 else: 7693 on_overflow = None 7694 7695 index = self._index 7696 if not self._match(TokenType.R_PAREN) and args: 7697 # postgres: STRING_AGG([DISTINCT] expression, separator [ORDER BY expression1 {ASC | DESC} [, ...]]) 7698 # bigquery: STRING_AGG([DISTINCT] expression [, separator] [ORDER BY key [{ASC | DESC}] [, ... ]] [LIMIT n]) 7699 # The order is parsed through `this` as a canonicalization for WITHIN GROUPs 7700 args[0] = self._parse_limit(this=self._parse_order(this=args[0])) 7701 return self.expression(exp.GroupConcat(this=args[0], separator=seq_get(args, 1))) 7702 7703 # Checks if we can parse an order clause: WITHIN GROUP (ORDER BY <order_by_expression_list> [ASC | DESC]). 7704 # This is done "manually", instead of letting _parse_window parse it into an exp.WithinGroup node, so that 7705 # the STRING_AGG call is parsed like in MySQL / SQLite and can thus be transpiled more easily to them. 7706 if not self._match_text_seq("WITHIN", "GROUP"): 7707 self._retreat(index) 7708 return self.validate_expression(exp.GroupConcat.from_arg_list(args), args) 7709 7710 # The corresponding match_r_paren will be called in parse_function (caller) 7711 self._match_l_paren() 7712 7713 return self.expression( 7714 exp.GroupConcat( 7715 this=self._parse_order(this=seq_get(args, 0)), 7716 separator=seq_get(args, 1), 7717 on_overflow=on_overflow, 7718 ) 7719 ) 7720 7721 def _parse_convert(self, strict: bool, safe: bool | None = None) -> exp.Expr | None: 7722 this = self._parse_bitwise() 7723 7724 if self._match(TokenType.USING): 7725 to: exp.Expr | None = exp.DType.CHARACTER_SET.into_expr( 7726 kind=self._parse_var(tokens={TokenType.BINARY}) 7727 ) 7728 elif self._match(TokenType.COMMA): 7729 to = self._parse_types() 7730 else: 7731 to = None 7732 7733 return self.build_cast(strict=strict, this=this, to=to, safe=safe) 7734 7735 def _parse_xml_element(self) -> exp.XMLElement: 7736 if self._match_text_seq("EVALNAME"): 7737 evalname = True 7738 this = self._parse_bitwise() 7739 else: 7740 evalname = None 7741 self._match_text_seq("NAME") 7742 this = self._parse_id_var() 7743 7744 return self.expression( 7745 exp.XMLElement( 7746 this=this, 7747 expressions=self._match(TokenType.COMMA) and self._parse_csv(self._parse_bitwise), 7748 evalname=evalname, 7749 ) 7750 ) 7751 7752 def _parse_xml_table(self) -> exp.XMLTable: 7753 namespaces = None 7754 passing = None 7755 columns = None 7756 7757 if self._match_text_seq("XMLNAMESPACES", "("): 7758 namespaces = self._parse_xml_namespace() 7759 self._match_text_seq(")", ",") 7760 7761 this = self._parse_string() 7762 7763 if self._match_text_seq("PASSING"): 7764 # The BY VALUE keywords are optional and are provided for semantic clarity 7765 self._match_text_seq("BY", "VALUE") 7766 passing = self._parse_csv(self._parse_column) 7767 7768 by_ref = self._match_text_seq("RETURNING", "SEQUENCE", "BY", "REF") 7769 7770 if self._match_text_seq("COLUMNS"): 7771 columns = self._parse_csv(self._parse_field_def) 7772 7773 return self.expression( 7774 exp.XMLTable( 7775 this=this, namespaces=namespaces, passing=passing, columns=columns, by_ref=by_ref 7776 ) 7777 ) 7778 7779 def _parse_xml_namespace(self) -> list[exp.XMLNamespace]: 7780 namespaces = [] 7781 7782 while True: 7783 if self._match(TokenType.DEFAULT): 7784 uri = self._parse_string() 7785 else: 7786 uri = self._parse_alias(self._parse_string()) 7787 namespaces.append(self.expression(exp.XMLNamespace(this=uri))) 7788 if not self._match(TokenType.COMMA): 7789 break 7790 7791 return namespaces 7792 7793 def _parse_decode(self) -> exp.Decode | exp.DecodeCase | None: 7794 args = self._parse_csv(self._parse_disjunction) 7795 7796 if len(args) < 3: 7797 return self.expression(exp.Decode(this=seq_get(args, 0), charset=seq_get(args, 1))) 7798 7799 return self.expression(exp.DecodeCase(expressions=args)) 7800 7801 def _parse_json_key_value(self) -> exp.JSONKeyValue | None: 7802 self._match_text_seq("KEY") 7803 key = self._parse_column() 7804 self._match_set(self.JSON_KEY_VALUE_SEPARATOR_TOKENS) 7805 self._match_text_seq("VALUE") 7806 value = self._parse_bitwise() 7807 7808 if not key and not value: 7809 return None 7810 return self.expression(exp.JSONKeyValue(this=key, expression=value)) 7811 7812 def _parse_format_json(self, this: exp.Expr | None) -> exp.Expr | None: 7813 if not this or not self._match_text_seq("FORMAT", "JSON"): 7814 return this 7815 7816 return self.expression(exp.FormatJson(this=this)) 7817 7818 def _parse_on_condition(self) -> exp.OnCondition | None: 7819 # MySQL uses "X ON EMPTY Y ON ERROR" (e.g. JSON_VALUE) while Oracle uses the opposite (e.g. JSON_EXISTS) 7820 if self.dialect.ON_CONDITION_EMPTY_BEFORE_ERROR: 7821 empty = self._parse_on_handling("EMPTY", *self.ON_CONDITION_TOKENS) 7822 error = self._parse_on_handling("ERROR", *self.ON_CONDITION_TOKENS) 7823 else: 7824 error = self._parse_on_handling("ERROR", *self.ON_CONDITION_TOKENS) 7825 empty = self._parse_on_handling("EMPTY", *self.ON_CONDITION_TOKENS) 7826 7827 null = self._parse_on_handling("NULL", *self.ON_CONDITION_TOKENS) 7828 7829 if not empty and not error and not null: 7830 return None 7831 7832 return self.expression(exp.OnCondition(empty=empty, error=error, null=null)) 7833 7834 def _parse_on_handling(self, on: str, *values: str) -> str | None | exp.Expr | None: 7835 # Parses the "X ON Y" or "DEFAULT <expr> ON Y syntax, e.g. NULL ON NULL (Oracle, T-SQL, MySQL) 7836 for value in values: 7837 if self._match_text_seq(value, "ON", on): 7838 return f"{value} ON {on}" 7839 7840 index = self._index 7841 if self._match(TokenType.DEFAULT): 7842 default_value = self._parse_bitwise() 7843 if self._match_text_seq("ON", on): 7844 return default_value 7845 7846 self._retreat(index) 7847 7848 return None 7849 7850 @t.overload 7851 def _parse_json_object(self, agg: t.Literal[False]) -> exp.JSONObject: ... 7852 7853 @t.overload 7854 def _parse_json_object(self, agg: t.Literal[True]) -> exp.JSONObjectAgg: ... 7855 7856 def _parse_json_object(self, agg=False): 7857 star = self._parse_star() 7858 expressions = ( 7859 [star] 7860 if star 7861 else self._parse_csv(lambda: self._parse_format_json(self._parse_json_key_value())) 7862 ) 7863 null_handling = self._parse_on_handling("NULL", "NULL", "ABSENT") 7864 7865 unique_keys = None 7866 if self._match_text_seq("WITH", "UNIQUE"): 7867 unique_keys = True 7868 elif self._match_text_seq("WITHOUT", "UNIQUE"): 7869 unique_keys = False 7870 7871 self._match_text_seq("KEYS") 7872 7873 return_type = self._match_text_seq("RETURNING") and self._parse_format_json( 7874 self._parse_type() 7875 ) 7876 encoding = self._match_text_seq("ENCODING") and self._parse_var() 7877 7878 return self.expression( 7879 (exp.JSONObjectAgg if agg else exp.JSONObject)( 7880 expressions=expressions, 7881 null_handling=null_handling, 7882 unique_keys=unique_keys, 7883 return_type=return_type, 7884 encoding=encoding, 7885 ) 7886 ) 7887 7888 # Note: this is currently incomplete; it only implements the "JSON_value_column" part 7889 def _parse_json_column_def(self) -> exp.JSONColumnDef: 7890 if not self._match_text_seq("NESTED"): 7891 this = self._parse_id_var() 7892 ordinality = self._match_pair(TokenType.FOR, TokenType.ORDINALITY) 7893 kind = self._parse_types(allow_identifiers=False) 7894 nested = None 7895 else: 7896 this = None 7897 ordinality = None 7898 kind = None 7899 nested = True 7900 7901 path = self._match_text_seq("PATH") and self._parse_string() 7902 nested_schema = nested and self._parse_json_schema() 7903 7904 return self.expression( 7905 exp.JSONColumnDef( 7906 this=this, kind=kind, path=path, nested_schema=nested_schema, ordinality=ordinality 7907 ) 7908 ) 7909 7910 def _parse_json_schema(self) -> exp.JSONSchema: 7911 self._match_text_seq("COLUMNS") 7912 return self.expression( 7913 exp.JSONSchema( 7914 expressions=self._parse_wrapped_csv(self._parse_json_column_def, optional=True) 7915 ) 7916 ) 7917 7918 def _parse_json_table(self) -> exp.JSONTable: 7919 this = self._parse_format_json(self._parse_bitwise()) 7920 path = self._match(TokenType.COMMA) and self._parse_string() 7921 error_handling = self._parse_on_handling("ERROR", "ERROR", "NULL") 7922 empty_handling = self._parse_on_handling("EMPTY", "ERROR", "NULL") 7923 schema = self._parse_json_schema() 7924 7925 return exp.JSONTable( 7926 this=this, 7927 schema=schema, 7928 path=path, 7929 error_handling=error_handling, 7930 empty_handling=empty_handling, 7931 ) 7932 7933 def _parse_match_against(self) -> exp.MatchAgainst: 7934 if self._match_text_seq("TABLE"): 7935 # parse SingleStore MATCH(TABLE ...) syntax 7936 # https://docs.singlestore.com/cloud/reference/sql-reference/full-text-search-functions/match/ 7937 expressions = [] 7938 table = self._parse_table() 7939 if table: 7940 expressions = [table] 7941 else: 7942 expressions = self._parse_csv(self._parse_column) 7943 7944 self._match_text_seq(")", "AGAINST", "(") 7945 7946 this = self._parse_string() 7947 7948 if self._match_text_seq("IN", "NATURAL", "LANGUAGE", "MODE"): 7949 modifier = "IN NATURAL LANGUAGE MODE" 7950 if self._match_text_seq("WITH", "QUERY", "EXPANSION"): 7951 modifier = f"{modifier} WITH QUERY EXPANSION" 7952 elif self._match_text_seq("IN", "BOOLEAN", "MODE"): 7953 modifier = "IN BOOLEAN MODE" 7954 elif self._match_text_seq("WITH", "QUERY", "EXPANSION"): 7955 modifier = "WITH QUERY EXPANSION" 7956 else: 7957 modifier = None 7958 7959 return self.expression( 7960 exp.MatchAgainst(this=this, expressions=expressions, modifier=modifier) 7961 ) 7962 7963 # https://learn.microsoft.com/en-us/sql/t-sql/functions/openjson-transact-sql?view=sql-server-ver16 7964 def _parse_open_json(self) -> exp.OpenJSON: 7965 this = self._parse_bitwise() 7966 path = self._match(TokenType.COMMA) and self._parse_string() 7967 7968 def _parse_open_json_column_def() -> exp.OpenJSONColumnDef: 7969 this = self._parse_field(any_token=True) 7970 kind = self._parse_types() 7971 path = self._parse_string() 7972 as_json = self._match_pair(TokenType.ALIAS, TokenType.JSON) 7973 7974 return self.expression( 7975 exp.OpenJSONColumnDef(this=this, kind=kind, path=path, as_json=as_json) 7976 ) 7977 7978 expressions = None 7979 if self._match_pair(TokenType.R_PAREN, TokenType.WITH): 7980 self._match_l_paren() 7981 expressions = self._parse_csv(_parse_open_json_column_def) 7982 7983 return self.expression(exp.OpenJSON(this=this, path=path, expressions=expressions)) 7984 7985 def _parse_position(self, haystack_first: bool = False) -> exp.StrPosition: 7986 args = self._parse_csv(self._parse_bitwise) 7987 7988 if self._match(TokenType.IN): 7989 return self.expression( 7990 exp.StrPosition(this=self._parse_bitwise(), substr=seq_get(args, 0)) 7991 ) 7992 7993 if haystack_first: 7994 haystack = seq_get(args, 0) 7995 needle = seq_get(args, 1) 7996 else: 7997 haystack = seq_get(args, 1) 7998 needle = seq_get(args, 0) 7999 8000 return self.expression( 8001 exp.StrPosition(this=haystack, substr=needle, position=seq_get(args, 2)) 8002 ) 8003 8004 def _parse_join_hint(self, func_name: str) -> exp.JoinHint: 8005 args = self._parse_csv(self._parse_table) 8006 return exp.JoinHint(this=func_name.upper(), expressions=args) 8007 8008 def _parse_substring(self) -> exp.Substring: 8009 # Postgres supports the form: substring(string [from int] [for int]) 8010 # (despite being undocumented, the reverse order also works) 8011 # https://www.postgresql.org/docs/9.1/functions-string.html @ Table 9-6 8012 8013 args = t.cast(list[t.Optional[exp.Expr]], self._parse_csv(self._parse_bitwise)) 8014 8015 start, length = None, None 8016 8017 while self._curr: 8018 if self._match(TokenType.FROM): 8019 start = self._parse_bitwise() 8020 elif self._match(TokenType.FOR): 8021 if not start: 8022 start = exp.Literal.number(1) 8023 length = self._parse_bitwise() 8024 else: 8025 break 8026 8027 if start: 8028 args.append(start) 8029 if length: 8030 args.append(length) 8031 8032 return self.validate_expression(exp.Substring.from_arg_list(args), args) 8033 8034 def _parse_trim(self) -> exp.Trim: 8035 # https://www.w3resource.com/sql/character-functions/trim.php 8036 # https://docs.oracle.com/javadb/10.8.3.0/ref/rreftrimfunc.html 8037 8038 position = None 8039 collation = None 8040 expression = None 8041 8042 if self._match_texts(self.TRIM_TYPES): 8043 position = self._prev.text.upper() 8044 8045 this = self._parse_bitwise() 8046 if self._match_set((TokenType.FROM, TokenType.COMMA)): 8047 invert_order = self._prev.token_type == TokenType.FROM or self.TRIM_PATTERN_FIRST 8048 expression = self._parse_bitwise() 8049 8050 if invert_order: 8051 this, expression = expression, this 8052 8053 if self._match(TokenType.COLLATE): 8054 collation = self._parse_bitwise() 8055 8056 return self.expression( 8057 exp.Trim(this=this, position=position, expression=expression, collation=collation) 8058 ) 8059 8060 def _parse_window_clause(self) -> list[exp.Expr] | None: 8061 return self._parse_csv(self._parse_named_window) if self._match(TokenType.WINDOW) else None 8062 8063 def _parse_named_window(self) -> exp.Expr | None: 8064 return self._parse_window(self._parse_id_var(), alias=True) 8065 8066 def _parse_respect_or_ignore_nulls(self, this: exp.Expr | None) -> exp.Expr | None: 8067 if self._curr.token_type == TokenType.VAR: 8068 if self._match_text_seq("IGNORE", "NULLS"): 8069 return self.expression(exp.IgnoreNulls(this=this)) 8070 if self._match_text_seq("RESPECT", "NULLS"): 8071 return self.expression(exp.RespectNulls(this=this)) 8072 return this 8073 8074 def _parse_having_max(self, this: exp.Expr | None) -> exp.Expr | None: 8075 if self._match(TokenType.HAVING): 8076 self._match_texts(("MAX", "MIN")) 8077 max = self._prev.text.upper() != "MIN" 8078 return self.expression( 8079 exp.HavingMax(this=this, expression=self._parse_column(), max=max) 8080 ) 8081 8082 return this 8083 8084 def _parse_window(self, this: exp.Expr | None, alias: bool = False) -> exp.Expr | None: 8085 func = this 8086 comments = func.comments if isinstance(func, exp.Expr) else None 8087 8088 # T-SQL allows the OVER (...) syntax after WITHIN GROUP. 8089 # https://learn.microsoft.com/en-us/sql/t-sql/functions/percentile-disc-transact-sql?view=sql-server-ver16 8090 if self._match_text_seq("WITHIN", "GROUP"): 8091 order = self._parse_wrapped(self._parse_order) 8092 this = self.expression(exp.WithinGroup(this=this, expression=order)) 8093 8094 if self._match_pair(TokenType.FILTER, TokenType.L_PAREN): 8095 self._match(TokenType.WHERE) 8096 this = self.expression( 8097 exp.Filter(this=this, expression=self._parse_where(skip_where_token=True)) 8098 ) 8099 self._match_r_paren() 8100 8101 # SQL spec defines an optional [ { IGNORE | RESPECT } NULLS ] OVER 8102 # Some dialects choose to implement and some do not. 8103 # https://dev.mysql.com/doc/refman/8.0/en/window-function-descriptions.html 8104 8105 # There is some code above in _parse_lambda that handles 8106 # SELECT FIRST_VALUE(TABLE.COLUMN IGNORE|RESPECT NULLS) OVER ... 8107 8108 # The below changes handle 8109 # SELECT FIRST_VALUE(TABLE.COLUMN) IGNORE|RESPECT NULLS OVER ... 8110 8111 # Oracle allows both formats 8112 # (https://docs.oracle.com/en/database/oracle/oracle-database/19/sqlrf/img_text/first_value.html) 8113 # and Snowflake chose to do the same for familiarity 8114 # https://docs.snowflake.com/en/sql-reference/functions/first_value.html#usage-notes 8115 if isinstance(this, exp.AggFunc): 8116 ignore_respect = this.find(exp.IgnoreNulls, exp.RespectNulls) 8117 8118 if ignore_respect and ignore_respect is not this: 8119 ignore_respect.replace(ignore_respect.this) 8120 this = self.expression(ignore_respect.__class__(this=this)) 8121 8122 this = self._parse_respect_or_ignore_nulls(this) 8123 8124 # bigquery select from window x AS (partition by ...) 8125 if alias: 8126 over = None 8127 self._match(TokenType.ALIAS) 8128 elif not self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS): 8129 return this 8130 else: 8131 over = self._prev.text.upper() 8132 8133 if comments and isinstance(func, exp.Expr): 8134 func.pop_comments() 8135 8136 if not self._match(TokenType.L_PAREN): 8137 return self.expression( 8138 exp.Window(this=this, alias=self._parse_id_var(False), over=over), comments=comments 8139 ) 8140 8141 window_alias = self._parse_id_var(any_token=False, tokens=self.WINDOW_ALIAS_TOKENS) 8142 8143 first: bool | None = True if self._match(TokenType.FIRST) else None 8144 if self._match_text_seq("LAST"): 8145 first = False 8146 8147 partition, order = self._parse_partition_and_order() 8148 kind = ( 8149 self._match_set((TokenType.ROWS, TokenType.RANGE)) or self._match_text_seq("GROUPS") 8150 ) and self._prev.text 8151 8152 if kind: 8153 self._match(TokenType.BETWEEN) 8154 start = self._parse_window_spec() 8155 8156 end = self._parse_window_spec() if self._match(TokenType.AND) else {} 8157 exclude = ( 8158 self._parse_var_from_options(self.WINDOW_EXCLUDE_OPTIONS) 8159 if self._match_text_seq("EXCLUDE") 8160 else None 8161 ) 8162 8163 spec = self.expression( 8164 exp.WindowSpec( 8165 kind=kind, 8166 start=start["value"], 8167 start_side=start["side"], 8168 end=end.get("value"), 8169 end_side=end.get("side"), 8170 exclude=exclude, 8171 ) 8172 ) 8173 else: 8174 spec = None 8175 8176 self._match_r_paren() 8177 8178 window = self.expression( 8179 exp.Window( 8180 this=this, 8181 partition_by=partition, 8182 order=order, 8183 spec=spec, 8184 alias=window_alias, 8185 over=over, 8186 first=first, 8187 ), 8188 comments=comments, 8189 ) 8190 8191 # This covers Oracle's FIRST/LAST syntax: aggregate KEEP (...) OVER (...) 8192 if self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS, advance=False): 8193 return self._parse_window(window, alias=alias) 8194 8195 return window 8196 8197 def _parse_partition_and_order( 8198 self, 8199 ) -> tuple[list[exp.Expr], exp.Expr | None]: 8200 return self._parse_partition_by(), self._parse_order() 8201 8202 def _parse_window_spec(self) -> dict[str, str | exp.Expr | None]: 8203 self._match(TokenType.BETWEEN) 8204 8205 return { 8206 "value": ( 8207 (self._match_text_seq("UNBOUNDED") and "UNBOUNDED") 8208 or (self._match_text_seq("CURRENT", "ROW") and "CURRENT ROW") 8209 or self._parse_bitwise() 8210 ), 8211 "side": self._prev.text if self._match_texts(self.WINDOW_SIDES) else None, 8212 } 8213 8214 def _parse_alias(self, this: exp.Expr | None, explicit: bool = False) -> exp.Expr | None: 8215 # In some dialects, LIMIT and OFFSET can act as both identifiers and keywords (clauses) 8216 # so this section tries to parse the clause version and if it fails, it treats the token 8217 # as an identifier (alias) 8218 if self._can_parse_limit_or_offset(): 8219 return this 8220 8221 any_token = self._match(TokenType.ALIAS) 8222 comments = self._prev_comments 8223 8224 if explicit and not any_token: 8225 return this 8226 8227 if self._match(TokenType.L_PAREN): 8228 aliases = self.expression( 8229 exp.Aliases( 8230 this=this, expressions=self._parse_csv(lambda: self._parse_id_var(any_token)) 8231 ), 8232 comments=comments, 8233 ) 8234 self._match_r_paren(aliases) 8235 return aliases 8236 8237 alias = self._parse_id_var(any_token, tokens=self.ALIAS_TOKENS) or ( 8238 self.STRING_ALIASES and self._parse_string_as_identifier() 8239 ) 8240 8241 if alias: 8242 comments.extend(alias.pop_comments()) 8243 this = self.expression(exp.Alias(this=this, alias=alias), comments=comments) 8244 column = this.this 8245 8246 # Moves the comment next to the alias in `expr /* comment */ AS alias` 8247 if not this.comments and column and column.comments: 8248 this.comments = column.pop_comments() 8249 8250 return this 8251 8252 def _parse_id_var( 8253 self, 8254 any_token: bool = True, 8255 tokens: t.Collection[TokenType] | None = None, 8256 ) -> exp.Expr | None: 8257 expression = self._parse_identifier() 8258 if not expression and ( 8259 (any_token and self._advance_any()) or self._match_set(tokens or self.ID_VAR_TOKENS) 8260 ): 8261 quoted = self._prev.token_type == TokenType.STRING 8262 expression = self._identifier_expression(quoted=quoted) 8263 8264 return expression 8265 8266 def _parse_string(self) -> exp.Expr | None: 8267 if self._match_set(self.STRING_PARSERS): 8268 return self.STRING_PARSERS[self._prev.token_type](self, self._prev) 8269 return self._parse_placeholder() 8270 8271 def _parse_string_as_identifier(self) -> exp.Identifier | None: 8272 if not self._match(TokenType.STRING): 8273 return None 8274 output = exp.to_identifier(self._prev.text, quoted=True) 8275 output.update_positions(self._prev) 8276 return output 8277 8278 def _parse_number(self) -> exp.Expr | None: 8279 if self._match_set(self.NUMERIC_PARSERS): 8280 return self.NUMERIC_PARSERS[self._prev.token_type](self, self._prev) 8281 return self._parse_placeholder() 8282 8283 def _parse_identifier(self) -> exp.Expr | None: 8284 if self._match(TokenType.IDENTIFIER): 8285 return self._identifier_expression(quoted=True) 8286 return self._parse_placeholder() 8287 8288 def _parse_var( 8289 self, 8290 any_token: bool = False, 8291 tokens: t.Collection[TokenType] | None = None, 8292 upper: bool = False, 8293 ) -> exp.Expr | None: 8294 if ( 8295 (any_token and self._advance_any()) 8296 or self._match(TokenType.VAR) 8297 or (self._match_set(tokens) if tokens else False) 8298 ): 8299 return self.expression( 8300 exp.Var(this=self._prev.text.upper() if upper else self._prev.text) 8301 ) 8302 return self._parse_placeholder() 8303 8304 def _advance_any(self, ignore_reserved: bool = False) -> Token | None: 8305 if self._curr and (ignore_reserved or self._curr.token_type not in self.RESERVED_TOKENS): 8306 self._advance() 8307 return self._prev 8308 return None 8309 8310 def _parse_var_or_string(self, upper: bool = False) -> exp.Expr | None: 8311 return self._parse_string() or self._parse_var(any_token=True, upper=upper) 8312 8313 def _parse_primary_or_var(self) -> exp.Expr | None: 8314 return self._parse_primary() or self._parse_var(any_token=True) 8315 8316 def _parse_null(self) -> exp.Expr | None: 8317 if self._match_set((TokenType.NULL, TokenType.UNKNOWN)): 8318 return self.PRIMARY_PARSERS[TokenType.NULL](self, self._prev) 8319 return self._parse_placeholder() 8320 8321 def _parse_boolean(self) -> exp.Expr | None: 8322 if self._match(TokenType.TRUE): 8323 return self.PRIMARY_PARSERS[TokenType.TRUE](self, self._prev) 8324 if self._match(TokenType.FALSE): 8325 return self.PRIMARY_PARSERS[TokenType.FALSE](self, self._prev) 8326 return self._parse_placeholder() 8327 8328 def _parse_star(self) -> exp.Expr | None: 8329 if self._match(TokenType.STAR): 8330 return self.PRIMARY_PARSERS[TokenType.STAR](self, self._prev) 8331 return self._parse_placeholder() 8332 8333 def _parse_parameter(self) -> exp.Parameter: 8334 this = self._parse_identifier() or self._parse_primary_or_var() 8335 return self.expression(exp.Parameter(this=this)) 8336 8337 def _parse_placeholder(self) -> exp.Expr | None: 8338 if self._match_set(self.PLACEHOLDER_PARSERS): 8339 placeholder = self.PLACEHOLDER_PARSERS[self._prev.token_type](self) 8340 if placeholder: 8341 return placeholder 8342 self._advance(-1) 8343 return None 8344 8345 def _parse_star_op(self, *keywords: str) -> list[exp.Expr] | None: 8346 if not self._match_texts(keywords): 8347 return None 8348 if self._match(TokenType.L_PAREN, advance=False): 8349 return self._parse_wrapped_csv(self._parse_expression) 8350 8351 expression = self._parse_alias(self._parse_disjunction(), explicit=True) 8352 return [expression] if expression else None 8353 8354 def _parse_csv( 8355 self, parse_method: t.Callable[[], T | None], sep: TokenType = TokenType.COMMA 8356 ) -> list[T]: 8357 parse_result = parse_method() 8358 items = [parse_result] if parse_result is not None else [] 8359 8360 while self._match(sep): 8361 if isinstance(parse_result, exp.Expr): 8362 self._add_comments(parse_result) 8363 parse_result = parse_method() 8364 if parse_result is not None: 8365 items.append(parse_result) 8366 8367 return items 8368 8369 def _parse_wrapped_id_vars(self, optional: bool = False) -> list[exp.Expr]: 8370 return self._parse_wrapped_csv(self._parse_id_var, optional=optional) 8371 8372 def _parse_wrapped_csv( 8373 self, 8374 parse_method: t.Callable[[], T | None], 8375 sep: TokenType = TokenType.COMMA, 8376 optional: bool = False, 8377 ) -> list[T]: 8378 return self._parse_wrapped( 8379 lambda: self._parse_csv(parse_method, sep=sep), optional=optional 8380 ) 8381 8382 def _parse_wrapped(self, parse_method: t.Callable[[], T], optional: bool = False) -> T: 8383 wrapped = self._match(TokenType.L_PAREN) 8384 if not wrapped and not optional: 8385 self.raise_error("Expecting (") 8386 parse_result = parse_method() 8387 if wrapped: 8388 self._match_r_paren() 8389 return parse_result 8390 8391 def _parse_expressions(self) -> list[exp.Expr]: 8392 return self._parse_csv(self._parse_expression) 8393 8394 def _parse_select_or_expression(self, alias: bool = False) -> exp.Expr | None: 8395 return ( 8396 self._parse_set_operations( 8397 self._parse_alias(self._parse_assignment(), explicit=True) 8398 if alias 8399 else self._parse_assignment() 8400 ) 8401 or self._parse_select() 8402 ) 8403 8404 def _parse_ddl_select(self) -> exp.Expr | None: 8405 return self._parse_query_modifiers( 8406 self._parse_set_operations(self._parse_select(nested=True, parse_subquery_alias=False)) 8407 ) 8408 8409 def _parse_transaction(self) -> exp.Transaction | exp.Command: 8410 this = None 8411 if self._match_texts(self.TRANSACTION_KIND): 8412 this = self._prev.text 8413 8414 self._match_texts(("TRANSACTION", "WORK")) 8415 8416 modes = [] 8417 while True: 8418 mode = [] 8419 while self._match(TokenType.VAR) or self._match(TokenType.NOT): 8420 mode.append(self._prev.text) 8421 8422 if mode: 8423 modes.append(" ".join(mode)) 8424 if not self._match(TokenType.COMMA): 8425 break 8426 8427 return self.expression(exp.Transaction(this=this, modes=modes)) 8428 8429 def _parse_commit_or_rollback(self) -> exp.Commit | exp.Rollback: 8430 chain = None 8431 savepoint = None 8432 is_rollback = self._prev.token_type == TokenType.ROLLBACK 8433 8434 self._match_texts(("TRANSACTION", "WORK")) 8435 8436 if self._match_text_seq("TO"): 8437 self._match_text_seq("SAVEPOINT") 8438 savepoint = self._parse_id_var() 8439 8440 if self._match(TokenType.AND): 8441 chain = not self._match_text_seq("NO") 8442 self._match_text_seq("CHAIN") 8443 8444 if is_rollback: 8445 return self.expression(exp.Rollback(savepoint=savepoint)) 8446 8447 return self.expression(exp.Commit(chain=chain)) 8448 8449 def _parse_refresh(self) -> exp.Refresh | exp.Command: 8450 if self._match(TokenType.TABLE): 8451 kind = "TABLE" 8452 elif self._match_text_seq("MATERIALIZED", "VIEW"): 8453 kind = "MATERIALIZED VIEW" 8454 else: 8455 kind = "" 8456 8457 this = self._parse_string() or self._parse_table() 8458 if not kind and not isinstance(this, exp.Literal): 8459 return self._parse_as_command(self._prev) 8460 8461 return self.expression(exp.Refresh(this=this, kind=kind)) 8462 8463 def _parse_column_def_with_exists(self): 8464 start = self._index 8465 self._match(TokenType.COLUMN) 8466 8467 exists_column = self._parse_exists(not_=True) 8468 expression = self._parse_field_def() 8469 8470 if not isinstance(expression, exp.ColumnDef): 8471 self._retreat(start) 8472 return None 8473 8474 expression.set("exists", exists_column) 8475 8476 return expression 8477 8478 def _parse_add_column(self) -> exp.ColumnDef | None: 8479 if not self._prev.text.upper() == "ADD": 8480 return None 8481 8482 expression = self._parse_column_def_with_exists() 8483 if not expression: 8484 return None 8485 8486 # https://docs.databricks.com/delta/update-schema.html#explicitly-update-schema-to-add-columns 8487 if self._match_texts(("FIRST", "AFTER")): 8488 position = self._prev.text 8489 column_position = self.expression( 8490 exp.ColumnPosition(this=self._parse_column(), position=position) 8491 ) 8492 expression.set("position", column_position) 8493 8494 return expression 8495 8496 def _parse_drop_column(self) -> exp.Drop | exp.Command | None: 8497 drop = self._parse_drop() if self._match(TokenType.DROP) else None 8498 if drop and not isinstance(drop, exp.Command): 8499 drop.set("kind", drop.args.get("kind", "COLUMN")) 8500 return drop 8501 8502 # https://docs.aws.amazon.com/athena/latest/ug/alter-table-drop-partition.html 8503 def _parse_drop_partition(self, exists: bool | None = None) -> exp.DropPartition: 8504 return self.expression( 8505 exp.DropPartition(expressions=self._parse_csv(self._parse_partition), exists=exists) 8506 ) 8507 8508 def _parse_alter_table_add(self) -> list[exp.Expr]: 8509 def _parse_add_alteration() -> exp.Expr | None: 8510 self._match_text_seq("ADD") 8511 if self._match_set(self.ADD_CONSTRAINT_TOKENS, advance=False): 8512 return self.expression( 8513 exp.AddConstraint(expressions=self._parse_csv(self._parse_constraint)) 8514 ) 8515 8516 column_def = self._parse_add_column() 8517 if isinstance(column_def, exp.ColumnDef): 8518 return column_def 8519 8520 exists = self._parse_exists(not_=True) 8521 if self._match_pair(TokenType.PARTITION, TokenType.L_PAREN, advance=False): 8522 return self.expression( 8523 exp.AddPartition( 8524 exists=exists, 8525 this=self._parse_field(any_token=True), 8526 location=self._match_text_seq("LOCATION", advance=False) 8527 and self._parse_property(), 8528 ) 8529 ) 8530 8531 return None 8532 8533 if not self._match_set(self.ADD_CONSTRAINT_TOKENS, advance=False) and ( 8534 not self.dialect.ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN 8535 or self._match_text_seq("COLUMNS") 8536 ): 8537 schema = self._parse_schema() 8538 8539 return ( 8540 ensure_list(schema) 8541 if schema 8542 else self._parse_csv(self._parse_column_def_with_exists) 8543 ) 8544 8545 return self._parse_csv(_parse_add_alteration) 8546 8547 def _parse_alter_table_alter(self) -> exp.Expr | None: 8548 if self._match_texts(self.ALTER_ALTER_PARSERS): 8549 return self.ALTER_ALTER_PARSERS[self._prev.text.upper()](self) 8550 8551 # Many dialects support the ALTER [COLUMN] syntax, so if there is no 8552 # keyword after ALTER we default to parsing this statement 8553 self._match(TokenType.COLUMN) 8554 column = self._parse_field(any_token=True) 8555 8556 if self._match_pair(TokenType.DROP, TokenType.DEFAULT): 8557 return self.expression(exp.AlterColumn(this=column, drop=True)) 8558 if self._match_pair(TokenType.SET, TokenType.DEFAULT): 8559 return self.expression(exp.AlterColumn(this=column, default=self._parse_disjunction())) 8560 if self._match(TokenType.COMMENT): 8561 return self.expression(exp.AlterColumn(this=column, comment=self._parse_string())) 8562 if self._match_text_seq("DROP", "NOT", "NULL"): 8563 return self.expression(exp.AlterColumn(this=column, drop=True, allow_null=True)) 8564 if self._match_text_seq("SET", "NOT", "NULL"): 8565 return self.expression(exp.AlterColumn(this=column, allow_null=False)) 8566 8567 if self._match_text_seq("SET", "VISIBLE"): 8568 return self.expression(exp.AlterColumn(this=column, visible="VISIBLE")) 8569 if self._match_text_seq("SET", "INVISIBLE"): 8570 return self.expression(exp.AlterColumn(this=column, visible="INVISIBLE")) 8571 8572 self._match_text_seq("SET", "DATA") 8573 self._match_text_seq("TYPE") 8574 return self.expression( 8575 exp.AlterColumn( 8576 this=column, 8577 dtype=self._parse_types(), 8578 collate=self._match(TokenType.COLLATE) and self._parse_term(), 8579 using=self._match(TokenType.USING) and self._parse_disjunction(), 8580 ) 8581 ) 8582 8583 def _parse_alter_diststyle(self) -> exp.AlterDistStyle: 8584 if self._match_texts(("ALL", "EVEN", "AUTO")): 8585 return self.expression(exp.AlterDistStyle(this=exp.var(self._prev.text.upper()))) 8586 8587 self._match_text_seq("KEY", "DISTKEY") 8588 return self.expression(exp.AlterDistStyle(this=self._parse_column())) 8589 8590 def _parse_alter_sortkey(self, compound: bool | None = None) -> exp.AlterSortKey: 8591 if compound: 8592 self._match_text_seq("SORTKEY") 8593 8594 if self._match(TokenType.L_PAREN, advance=False): 8595 return self.expression( 8596 exp.AlterSortKey(expressions=self._parse_wrapped_id_vars(), compound=compound) 8597 ) 8598 8599 self._match_texts(("AUTO", "NONE")) 8600 return self.expression( 8601 exp.AlterSortKey(this=exp.var(self._prev.text.upper()), compound=compound) 8602 ) 8603 8604 def _parse_alter_table_drop(self) -> list[exp.Expr]: 8605 index = self._index - 1 8606 8607 partition_exists = self._parse_exists() 8608 if self._match(TokenType.PARTITION, advance=False): 8609 return self._parse_csv(lambda: self._parse_drop_partition(exists=partition_exists)) 8610 8611 self._retreat(index) 8612 return self._parse_csv(self._parse_drop_column) 8613 8614 def _parse_alter_table_rename(self) -> exp.AlterRename | exp.RenameColumn | None: 8615 if self._match(TokenType.COLUMN) or not self.ALTER_RENAME_REQUIRES_COLUMN: 8616 exists = self._parse_exists() 8617 old_column = self._parse_column() 8618 to = self._match_text_seq("TO") 8619 new_column = self._parse_column() 8620 8621 if old_column is None or not to or new_column is None: 8622 return None 8623 8624 return self.expression(exp.RenameColumn(this=old_column, to=new_column, exists=exists)) 8625 8626 self._match_text_seq("TO") 8627 return self.expression(exp.AlterRename(this=self._parse_table(schema=True))) 8628 8629 def _parse_alter_table_set(self) -> exp.AlterSet: 8630 alter_set = self.expression(exp.AlterSet()) 8631 8632 if self._match(TokenType.L_PAREN, advance=False) or self._match_text_seq( 8633 "TABLE", "PROPERTIES" 8634 ): 8635 alter_set.set("expressions", self._parse_wrapped_csv(self._parse_assignment)) 8636 elif self._match_text_seq("FILESTREAM_ON", advance=False): 8637 alter_set.set("expressions", [self._parse_assignment()]) 8638 elif self._match_texts(("LOGGED", "UNLOGGED")): 8639 alter_set.set("option", exp.var(self._prev.text.upper())) 8640 elif self._match_text_seq("WITHOUT") and self._match_texts(("CLUSTER", "OIDS")): 8641 alter_set.set("option", exp.var(f"WITHOUT {self._prev.text.upper()}")) 8642 elif self._match_text_seq("LOCATION"): 8643 alter_set.set("location", self._parse_field()) 8644 elif self._match_text_seq("ACCESS", "METHOD"): 8645 alter_set.set("access_method", self._parse_field()) 8646 elif self._match_text_seq("TABLESPACE"): 8647 alter_set.set("tablespace", self._parse_field()) 8648 elif self._match_text_seq("FILE", "FORMAT") or self._match_text_seq("FILEFORMAT"): 8649 alter_set.set("file_format", [self._parse_field()]) 8650 elif self._match_text_seq("STAGE_FILE_FORMAT"): 8651 alter_set.set("file_format", self._parse_wrapped_options()) 8652 elif self._match_text_seq("STAGE_COPY_OPTIONS"): 8653 alter_set.set("copy_options", self._parse_wrapped_options()) 8654 elif self._match_text_seq("TAG") or self._match_text_seq("TAGS"): 8655 alter_set.set("tag", self._parse_csv(self._parse_assignment)) 8656 else: 8657 if self._match_text_seq("SERDE"): 8658 alter_set.set("serde", self._parse_field()) 8659 8660 properties = self._parse_wrapped(self._parse_properties, optional=True) 8661 alter_set.set("expressions", [properties]) 8662 8663 return alter_set 8664 8665 def _parse_alter_session(self) -> exp.AlterSession: 8666 """Parse ALTER SESSION SET/UNSET statements.""" 8667 if self._match(TokenType.SET): 8668 expressions = self._parse_csv(lambda: self._parse_set_item_assignment()) 8669 return self.expression(exp.AlterSession(expressions=expressions, unset=False)) 8670 8671 self._match_text_seq("UNSET") 8672 expressions = self._parse_csv( 8673 lambda: self.expression(exp.SetItem(this=self._parse_id_var(any_token=True))) 8674 ) 8675 return self.expression(exp.AlterSession(expressions=expressions, unset=True)) 8676 8677 def _parse_alter(self) -> exp.Alter | exp.Command: 8678 start = self._prev 8679 8680 iceberg = self._match_text_seq("ICEBERG") 8681 8682 alter_token = self._match_set(self.ALTERABLES) and self._prev 8683 if not alter_token: 8684 return self._parse_as_command(start) 8685 if iceberg and alter_token.token_type != TokenType.TABLE: 8686 return self._parse_as_command(start) 8687 8688 exists = self._parse_exists() 8689 only = self._match_text_seq("ONLY") 8690 8691 if alter_token.token_type == TokenType.SESSION: 8692 this = None 8693 check = None 8694 cluster = None 8695 else: 8696 this = self._parse_table(schema=True, parse_partition=self.ALTER_TABLE_PARTITIONS) 8697 check = self._match_text_seq("WITH", "CHECK") 8698 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 8699 8700 if self._next: 8701 self._advance() 8702 8703 parser = self.ALTER_PARSERS.get(self._prev.text.upper()) if self._prev else None 8704 if parser: 8705 actions = ensure_list(parser(self)) 8706 not_valid = self._match_text_seq("NOT", "VALID") 8707 options = self._parse_csv(self._parse_property) 8708 cascade = self.dialect.ALTER_TABLE_SUPPORTS_CASCADE and self._match_text_seq("CASCADE") 8709 8710 if not self._curr and actions: 8711 return self.expression( 8712 exp.Alter( 8713 this=this, 8714 kind=alter_token.text.upper(), 8715 exists=exists, 8716 actions=actions, 8717 only=only, 8718 options=options, 8719 cluster=cluster, 8720 not_valid=not_valid, 8721 check=check, 8722 cascade=cascade, 8723 iceberg=iceberg, 8724 ) 8725 ) 8726 8727 return self._parse_as_command(start) 8728 8729 def _parse_analyze(self) -> exp.Analyze | exp.Command: 8730 start = self._prev 8731 # https://duckdb.org/docs/sql/statements/analyze 8732 if not self._curr: 8733 return self.expression(exp.Analyze()) 8734 8735 options = [] 8736 while self._match_texts(self.ANALYZE_STYLES): 8737 if self._prev.text.upper() == "BUFFER_USAGE_LIMIT": 8738 options.append(f"BUFFER_USAGE_LIMIT {self._parse_number()}") 8739 else: 8740 options.append(self._prev.text.upper()) 8741 8742 this: exp.Expr | None = None 8743 inner_expression: exp.Expr | None = None 8744 8745 kind = self._curr.text.upper() if self._curr else None 8746 8747 if self._match(TokenType.TABLE) or self._match(TokenType.INDEX): 8748 this = self._parse_table_parts() 8749 elif self._match_text_seq("TABLES"): 8750 if self._match_set((TokenType.FROM, TokenType.IN)): 8751 kind = f"{kind} {self._prev.text.upper()}" 8752 this = self._parse_table(schema=True, is_db_reference=True) 8753 elif self._match_text_seq("DATABASE"): 8754 this = self._parse_table(schema=True, is_db_reference=True) 8755 elif self._match_text_seq("CLUSTER"): 8756 this = self._parse_table() 8757 # Try matching inner expr keywords before fallback to parse table. 8758 elif self._match_texts(self.ANALYZE_EXPRESSION_PARSERS): 8759 kind = None 8760 inner_expression = self.ANALYZE_EXPRESSION_PARSERS[self._prev.text.upper()](self) 8761 else: 8762 # Empty kind https://prestodb.io/docs/current/sql/analyze.html 8763 kind = None 8764 this = self._parse_table_parts() 8765 8766 partition = self._try_parse(self._parse_partition) 8767 if not partition and self._match_texts(self.PARTITION_KEYWORDS): 8768 return self._parse_as_command(start) 8769 8770 # https://docs.starrocks.io/docs/sql-reference/sql-statements/cbo_stats/ANALYZE_TABLE/ 8771 if self._match_text_seq("WITH", "SYNC", "MODE") or self._match_text_seq( 8772 "WITH", "ASYNC", "MODE" 8773 ): 8774 mode = f"WITH {self._tokens[self._index - 2].text.upper()} MODE" 8775 else: 8776 mode = None 8777 8778 if self._match_texts(self.ANALYZE_EXPRESSION_PARSERS): 8779 inner_expression = self.ANALYZE_EXPRESSION_PARSERS[self._prev.text.upper()](self) 8780 8781 properties = self._parse_properties() 8782 return self.expression( 8783 exp.Analyze( 8784 kind=kind, 8785 this=this, 8786 mode=mode, 8787 partition=partition, 8788 properties=properties, 8789 expression=inner_expression, 8790 options=options, 8791 ) 8792 ) 8793 8794 # https://spark.apache.org/docs/3.5.1/sql-ref-syntax-aux-analyze-table.html 8795 def _parse_analyze_statistics(self) -> exp.AnalyzeStatistics: 8796 this = None 8797 kind = self._prev.text.upper() 8798 option = self._prev.text.upper() if self._match_text_seq("DELTA") else None 8799 expressions = [] 8800 8801 if not self._match_text_seq("STATISTICS"): 8802 self.raise_error("Expecting token STATISTICS") 8803 8804 if self._match_text_seq("NOSCAN"): 8805 this = "NOSCAN" 8806 elif self._match(TokenType.FOR): 8807 if self._match_text_seq("ALL", "COLUMNS"): 8808 this = "FOR ALL COLUMNS" 8809 if self._match_texts("COLUMNS"): 8810 this = "FOR COLUMNS" 8811 expressions = self._parse_csv(self._parse_column_reference) 8812 elif self._match_text_seq("SAMPLE"): 8813 sample = self._parse_number() 8814 expressions = [ 8815 self.expression( 8816 exp.AnalyzeSample( 8817 sample=sample, 8818 kind=self._prev.text.upper() if self._match(TokenType.PERCENT) else None, 8819 ) 8820 ) 8821 ] 8822 8823 return self.expression( 8824 exp.AnalyzeStatistics(kind=kind, option=option, this=this, expressions=expressions) 8825 ) 8826 8827 # https://docs.oracle.com/en/database/oracle/oracle-database/21/sqlrf/ANALYZE.html 8828 def _parse_analyze_validate(self) -> exp.AnalyzeValidate: 8829 kind = None 8830 this = None 8831 expression: exp.Expr | None = None 8832 if self._match_text_seq("REF", "UPDATE"): 8833 kind = "REF" 8834 this = "UPDATE" 8835 if self._match_text_seq("SET", "DANGLING", "TO", "NULL"): 8836 this = "UPDATE SET DANGLING TO NULL" 8837 elif self._match_text_seq("STRUCTURE"): 8838 kind = "STRUCTURE" 8839 if self._match_text_seq("CASCADE", "FAST"): 8840 this = "CASCADE FAST" 8841 elif self._match_text_seq("CASCADE", "COMPLETE") and self._match_texts( 8842 ("ONLINE", "OFFLINE") 8843 ): 8844 this = f"CASCADE COMPLETE {self._prev.text.upper()}" 8845 expression = self._parse_into() 8846 8847 return self.expression(exp.AnalyzeValidate(kind=kind, this=this, expression=expression)) 8848 8849 def _parse_analyze_columns(self) -> exp.AnalyzeColumns | None: 8850 this = self._prev.text.upper() 8851 if self._match_text_seq("COLUMNS"): 8852 return self.expression(exp.AnalyzeColumns(this=f"{this} {self._prev.text.upper()}")) 8853 return None 8854 8855 def _parse_analyze_delete(self) -> exp.AnalyzeDelete | None: 8856 kind = self._prev.text.upper() if self._match_text_seq("SYSTEM") else None 8857 if self._match_text_seq("STATISTICS"): 8858 return self.expression(exp.AnalyzeDelete(kind=kind)) 8859 return None 8860 8861 def _parse_analyze_list(self) -> exp.AnalyzeListChainedRows | None: 8862 if self._match_text_seq("CHAINED", "ROWS"): 8863 return self.expression(exp.AnalyzeListChainedRows(expression=self._parse_into())) 8864 return None 8865 8866 # https://dev.mysql.com/doc/refman/8.4/en/analyze-table.html 8867 def _parse_analyze_histogram(self) -> exp.AnalyzeHistogram: 8868 this = self._prev.text.upper() 8869 expression: exp.Expr | None = None 8870 expressions = [] 8871 update_options = None 8872 8873 if self._match_text_seq("HISTOGRAM", "ON"): 8874 expressions = self._parse_csv(self._parse_column_reference) 8875 with_expressions = [] 8876 while self._match(TokenType.WITH): 8877 # https://docs.starrocks.io/docs/sql-reference/sql-statements/cbo_stats/ANALYZE_TABLE/ 8878 if self._match_texts(("SYNC", "ASYNC")): 8879 if self._match_text_seq("MODE", advance=False): 8880 with_expressions.append(f"{self._prev.text.upper()} MODE") 8881 self._advance() 8882 else: 8883 buckets = self._parse_number() 8884 if self._match_text_seq("BUCKETS"): 8885 with_expressions.append(f"{buckets} BUCKETS") 8886 if with_expressions: 8887 expression = self.expression(exp.AnalyzeWith(expressions=with_expressions)) 8888 8889 if self._match_texts(("MANUAL", "AUTO")) and self._match( 8890 TokenType.UPDATE, advance=False 8891 ): 8892 update_options = self._prev.text.upper() 8893 self._advance() 8894 elif self._match_text_seq("USING", "DATA"): 8895 expression = self.expression(exp.UsingData(this=self._parse_string())) 8896 8897 return self.expression( 8898 exp.AnalyzeHistogram( 8899 this=this, 8900 expressions=expressions, 8901 expression=expression, 8902 update_options=update_options, 8903 ) 8904 ) 8905 8906 def _parse_merge(self) -> exp.Merge: 8907 self._match(TokenType.INTO) 8908 target = self._parse_table() 8909 8910 if target and self._match(TokenType.ALIAS, advance=False): 8911 target.set("alias", self._parse_table_alias()) 8912 8913 self._match(TokenType.USING) 8914 using = self._parse_table() 8915 8916 return self.expression( 8917 exp.Merge( 8918 this=target, 8919 using=using, 8920 on=self._match(TokenType.ON) and self._parse_disjunction(), 8921 using_cond=self._match(TokenType.USING) and self._parse_using_identifiers(), 8922 whens=self._parse_when_matched(), 8923 returning=self._parse_returning(), 8924 ) 8925 ) 8926 8927 def _parse_when_matched(self) -> exp.Whens: 8928 whens = [] 8929 8930 while self._match(TokenType.WHEN): 8931 matched = not self._match(TokenType.NOT) 8932 self._match_text_seq("MATCHED") 8933 source = ( 8934 False 8935 if self._match_text_seq("BY", "TARGET") 8936 else self._match_text_seq("BY", "SOURCE") 8937 ) 8938 condition = self._parse_disjunction() if self._match(TokenType.AND) else None 8939 8940 self._match(TokenType.THEN) 8941 8942 if self._match(TokenType.INSERT): 8943 this = self._parse_star() 8944 if this: 8945 then: exp.Expr | None = self.expression(exp.Insert(this=this)) 8946 else: 8947 then = self.expression( 8948 exp.Insert( 8949 this=exp.var("ROW") 8950 if self._match_text_seq("ROW") 8951 else self._parse_value(values=False), 8952 expression=self._match_text_seq("VALUES") and self._parse_value(), 8953 where=self._parse_where(), 8954 ) 8955 ) 8956 elif self._match(TokenType.UPDATE): 8957 expressions = self._parse_star() 8958 if expressions: 8959 then = self.expression(exp.Update(expressions=expressions)) 8960 else: 8961 then = self.expression( 8962 exp.Update( 8963 expressions=self._match(TokenType.SET) 8964 and self._parse_csv(self._parse_equality), 8965 where=self._parse_where(), 8966 ) 8967 ) 8968 elif self._match(TokenType.DELETE): 8969 then = self.expression(exp.Var(this=self._prev.text)) 8970 else: 8971 then = self._parse_var_from_options(self.CONFLICT_ACTIONS) 8972 8973 whens.append( 8974 self.expression( 8975 exp.When(matched=matched, source=source, condition=condition, then=then) 8976 ) 8977 ) 8978 return self.expression(exp.Whens(expressions=whens)) 8979 8980 def _parse_show(self) -> exp.Expr | None: 8981 parser = self._find_parser(self.SHOW_PARSERS, self.SHOW_TRIE) 8982 if parser: 8983 return parser(self) 8984 return self._parse_as_command(self._prev) 8985 8986 def _parse_set_item_assignment(self, kind: str | None = None) -> exp.Expr | None: 8987 index = self._index 8988 8989 if kind in ("GLOBAL", "SESSION") and self._match_text_seq("TRANSACTION"): 8990 return self._parse_set_transaction(global_=kind == "GLOBAL") 8991 8992 left = self._parse_primary() or self._parse_column() 8993 assignment_delimiter = self._match_texts(self.SET_ASSIGNMENT_DELIMITERS) 8994 8995 if not left or (self.SET_REQUIRES_ASSIGNMENT_DELIMITER and not assignment_delimiter): 8996 self._retreat(index) 8997 return None 8998 8999 right = self._parse_statement() or self._parse_id_var() 9000 if isinstance(right, (exp.Column, exp.Identifier)): 9001 right = exp.var(right.name) 9002 9003 this = self.expression(exp.EQ(this=left, expression=right)) 9004 return self.expression(exp.SetItem(this=this, kind=kind)) 9005 9006 def _parse_set_transaction(self, global_: bool = False) -> exp.Expr: 9007 self._match_text_seq("TRANSACTION") 9008 characteristics = self._parse_csv( 9009 lambda: self._parse_var_from_options(self.TRANSACTION_CHARACTERISTICS) 9010 ) 9011 return self.expression( 9012 exp.SetItem(expressions=characteristics, kind="TRANSACTION", global_=global_) 9013 ) 9014 9015 def _parse_set_item(self) -> exp.Expr | None: 9016 parser = self._find_parser(self.SET_PARSERS, self.SET_TRIE) 9017 return parser(self) if parser else self._parse_set_item_assignment(kind=None) 9018 9019 def _parse_set(self, unset: bool = False, tag: bool = False) -> exp.Set | exp.Command: 9020 index = self._index 9021 set_ = self.expression( 9022 exp.Set(expressions=self._parse_csv(self._parse_set_item), unset=unset, tag=tag) 9023 ) 9024 9025 if self._curr: 9026 self._retreat(index) 9027 return self._parse_as_command(self._prev) 9028 9029 return set_ 9030 9031 def _parse_var_from_options( 9032 self, options: OPTIONS_TYPE, raise_unmatched: bool = True 9033 ) -> exp.Var | None: 9034 start = self._curr 9035 if not start: 9036 return None 9037 9038 option = start.text.upper() 9039 continuations = options.get(option) 9040 9041 index = self._index 9042 self._advance() 9043 for keywords in continuations or []: 9044 if isinstance(keywords, str): 9045 keywords = (keywords,) 9046 9047 if self._match_text_seq(*keywords): 9048 option = f"{option} {' '.join(keywords)}" 9049 break 9050 else: 9051 if continuations or continuations is None: 9052 if raise_unmatched: 9053 self.raise_error(f"Unknown option {option}") 9054 9055 self._retreat(index) 9056 return None 9057 9058 return exp.var(option) 9059 9060 def _parse_as_command(self, start: Token) -> exp.Command: 9061 while self._curr: 9062 self._advance() 9063 text = self._find_sql(start, self._prev) 9064 size = len(start.text) 9065 self._warn_unsupported() 9066 return exp.Command(this=text[:size], expression=text[size:]) 9067 9068 def _parse_dict_property(self, this: str) -> exp.DictProperty: 9069 settings = [] 9070 9071 self._match_l_paren() 9072 kind = self._parse_id_var() 9073 9074 if self._match(TokenType.L_PAREN): 9075 while True: 9076 key = self._parse_id_var() 9077 value = self._parse_function() or self._parse_primary_or_var() 9078 if not key and value is None: 9079 break 9080 settings.append(self.expression(exp.DictSubProperty(this=key, value=value))) 9081 self._match(TokenType.R_PAREN) 9082 9083 self._match_r_paren() 9084 9085 return self.expression( 9086 exp.DictProperty(this=this, kind=kind.this if kind else None, settings=settings) 9087 ) 9088 9089 def _parse_dict_range(self, this: str) -> exp.DictRange: 9090 self._match_l_paren() 9091 has_min = self._match_text_seq("MIN") 9092 if has_min: 9093 min = self._parse_var() or self._parse_primary() 9094 self._match_text_seq("MAX") 9095 max = self._parse_var() or self._parse_primary() 9096 else: 9097 max = self._parse_var() or self._parse_primary() 9098 min = exp.Literal.number(0) 9099 self._match_r_paren() 9100 return self.expression(exp.DictRange(this=this, min=min, max=max)) 9101 9102 def _parse_comprehension(self, this: exp.Expr | None) -> exp.Comprehension | None: 9103 index = self._index 9104 expression = self._parse_column() 9105 position = self._match(TokenType.COMMA) and self._parse_column() 9106 9107 if not self._match(TokenType.IN): 9108 self._retreat(index - 1) 9109 return None 9110 iterator = self._parse_column() 9111 condition = self._parse_disjunction() if self._match_text_seq("IF") else None 9112 return self.expression( 9113 exp.Comprehension( 9114 this=this, 9115 expression=expression, 9116 position=position, 9117 iterator=iterator, 9118 condition=condition, 9119 ) 9120 ) 9121 9122 def _parse_heredoc(self) -> exp.Heredoc | None: 9123 if self._match(TokenType.HEREDOC_STRING): 9124 return self.expression(exp.Heredoc(this=self._prev.text)) 9125 9126 if not self._match_text_seq("$"): 9127 return None 9128 9129 tags = ["$"] 9130 tag_text = None 9131 9132 if self._is_connected(): 9133 self._advance() 9134 tags.append(self._prev.text.upper()) 9135 else: 9136 self.raise_error("No closing $ found") 9137 9138 if tags[-1] != "$": 9139 if self._is_connected() and self._match_text_seq("$"): 9140 tag_text = tags[-1] 9141 tags.append("$") 9142 else: 9143 self.raise_error("No closing $ found") 9144 9145 heredoc_start = self._curr 9146 9147 while self._curr: 9148 if self._match_text_seq(*tags, advance=False): 9149 this = self._find_sql(heredoc_start, self._prev) 9150 self._advance(len(tags)) 9151 return self.expression(exp.Heredoc(this=this, tag=tag_text)) 9152 9153 self._advance() 9154 9155 self.raise_error(f"No closing {''.join(tags)} found") 9156 return None 9157 9158 def _find_parser(self, parsers: dict[str, t.Callable], trie: dict) -> t.Callable | None: 9159 if not self._curr: 9160 return None 9161 9162 index = self._index 9163 this = [] 9164 while True: 9165 # The current token might be multiple words 9166 curr = self._curr.text.upper() 9167 key = curr.split(" ") 9168 this.append(curr) 9169 9170 self._advance() 9171 result, trie = in_trie(trie, key) 9172 if result == TrieResult.FAILED: 9173 break 9174 9175 if result == TrieResult.EXISTS: 9176 subparser = parsers[" ".join(this)] 9177 return subparser 9178 9179 self._retreat(index) 9180 return None 9181 9182 def _match_l_paren(self, expression: exp.Expr | None = None) -> None: 9183 if not self._match(TokenType.L_PAREN, expression=expression): 9184 self.raise_error("Expecting (") 9185 9186 def _match_r_paren(self, expression: exp.Expr | None = None) -> None: 9187 if not self._match(TokenType.R_PAREN, expression=expression): 9188 self.raise_error("Expecting )") 9189 9190 def _replace_lambda( 9191 self, node: exp.Expr | None, expressions: list[exp.Expr] 9192 ) -> exp.Expr | None: 9193 if not node: 9194 return node 9195 9196 lambda_types = {e.name: e.args.get("to") or False for e in expressions} 9197 9198 for column in node.find_all(exp.Column): 9199 typ = lambda_types.get(column.parts[0].name) 9200 if typ is not None: 9201 dot_or_id = column.to_dot() if column.table else column.this 9202 9203 if typ: 9204 dot_or_id = self.expression(exp.Cast(this=dot_or_id, to=typ)) 9205 9206 parent = column.parent 9207 9208 while isinstance(parent, exp.Dot): 9209 if not isinstance(parent.parent, exp.Dot): 9210 parent.replace(dot_or_id) 9211 break 9212 parent = parent.parent 9213 else: 9214 if column is node: 9215 node = dot_or_id 9216 else: 9217 column.replace(dot_or_id) 9218 return node 9219 9220 def _parse_truncate_table(self) -> exp.TruncateTable | None | exp.Expr: 9221 start = self._prev 9222 9223 # Not to be confused with TRUNCATE(number, decimals) function call 9224 if self._match(TokenType.L_PAREN): 9225 self._retreat(self._index - 2) 9226 return self._parse_function() 9227 9228 # Clickhouse supports TRUNCATE DATABASE as well 9229 is_database = self._match(TokenType.DATABASE) 9230 9231 self._match(TokenType.TABLE) 9232 9233 exists = self._parse_exists(not_=False) 9234 9235 expressions = self._parse_csv( 9236 lambda: self._parse_table(schema=True, is_db_reference=is_database) 9237 ) 9238 9239 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 9240 9241 if self._match_text_seq("RESTART", "IDENTITY"): 9242 identity = "RESTART" 9243 elif self._match_text_seq("CONTINUE", "IDENTITY"): 9244 identity = "CONTINUE" 9245 else: 9246 identity = None 9247 9248 if self._match_text_seq("CASCADE") or self._match_text_seq("RESTRICT"): 9249 option = self._prev.text 9250 else: 9251 option = None 9252 9253 partition = self._parse_partition() 9254 9255 # Fallback case 9256 if self._curr: 9257 return self._parse_as_command(start) 9258 9259 return self.expression( 9260 exp.TruncateTable( 9261 expressions=expressions, 9262 is_database=is_database, 9263 exists=exists, 9264 cluster=cluster, 9265 identity=identity, 9266 option=option, 9267 partition=partition, 9268 ) 9269 ) 9270 9271 def _parse_with_operator(self) -> exp.Expr | None: 9272 this = self._parse_ordered(self._parse_opclass) 9273 9274 if not self._match(TokenType.WITH): 9275 return this 9276 9277 op = self._parse_var(any_token=True, tokens=self.RESERVED_TOKENS) 9278 9279 return self.expression(exp.WithOperator(this=this, op=op)) 9280 9281 def _parse_wrapped_options(self) -> list[exp.Expr]: 9282 self._match(TokenType.EQ) 9283 self._match(TokenType.L_PAREN) 9284 9285 opts: list[exp.Expr] = [] 9286 option: exp.Expr | list[exp.Expr] | None 9287 while self._curr and not self._match(TokenType.R_PAREN): 9288 if self._match_text_seq("FORMAT_NAME", "="): 9289 # The FORMAT_NAME can be set to an identifier for Snowflake and T-SQL 9290 option = self._parse_format_name() 9291 else: 9292 option = self._parse_property() 9293 9294 if option is None: 9295 self.raise_error("Unable to parse option") 9296 break 9297 9298 opts.extend(ensure_list(option)) 9299 9300 return opts 9301 9302 def _parse_copy_parameters(self) -> list[exp.CopyParameter]: 9303 sep = TokenType.COMMA if self.dialect.COPY_PARAMS_ARE_CSV else None 9304 9305 options = [] 9306 while self._curr and not self._match(TokenType.R_PAREN, advance=False): 9307 option = self._parse_var(any_token=True) 9308 prev = self._prev.text.upper() 9309 9310 # Different dialects might separate options and values by white space, "=" and "AS" 9311 self._match(TokenType.EQ) 9312 self._match(TokenType.ALIAS) 9313 9314 param = self.expression(exp.CopyParameter(this=option)) 9315 9316 if prev in self.COPY_INTO_VARLEN_OPTIONS and self._match( 9317 TokenType.L_PAREN, advance=False 9318 ): 9319 # Snowflake FILE_FORMAT case, Databricks COPY & FORMAT options 9320 param.set("expressions", self._parse_wrapped_options()) 9321 elif prev == "FILE_FORMAT": 9322 # T-SQL's external file format case 9323 param.set("expression", self._parse_field()) 9324 elif ( 9325 prev == "FORMAT" 9326 and self._prev.token_type == TokenType.ALIAS 9327 and self._match_texts(("AVRO", "JSON")) 9328 ): 9329 param.set("this", exp.var(f"FORMAT AS {self._prev.text.upper()}")) 9330 param.set("expression", self._parse_field()) 9331 else: 9332 param.set("expression", self._parse_unquoted_field() or self._parse_bracket()) 9333 9334 options.append(param) 9335 9336 if sep: 9337 self._match(sep) 9338 9339 return options 9340 9341 def _parse_credentials(self) -> exp.Credentials | None: 9342 expr = self.expression(exp.Credentials()) 9343 9344 if self._match_text_seq("STORAGE_INTEGRATION", "="): 9345 expr.set("storage", self._parse_field()) 9346 if self._match_text_seq("CREDENTIALS"): 9347 # Snowflake case: CREDENTIALS = (...), Redshift case: CREDENTIALS <string> 9348 creds = ( 9349 self._parse_wrapped_options() if self._match(TokenType.EQ) else self._parse_field() 9350 ) 9351 expr.set("credentials", creds) 9352 if self._match_text_seq("ENCRYPTION"): 9353 expr.set("encryption", self._parse_wrapped_options()) 9354 if self._match_text_seq("IAM_ROLE"): 9355 expr.set( 9356 "iam_role", 9357 exp.var(self._prev.text) if self._match(TokenType.DEFAULT) else self._parse_field(), 9358 ) 9359 if self._match_text_seq("REGION"): 9360 expr.set("region", self._parse_field()) 9361 9362 return expr 9363 9364 def _parse_file_location(self) -> exp.Expr | None: 9365 return self._parse_field() 9366 9367 def _parse_copy(self) -> exp.Copy | exp.Command: 9368 start = self._prev 9369 9370 self._match(TokenType.INTO) 9371 9372 this = ( 9373 self._parse_select(nested=True, parse_subquery_alias=False) 9374 if self._match(TokenType.L_PAREN, advance=False) 9375 else self._parse_table(schema=True) 9376 ) 9377 9378 kind = self._match(TokenType.FROM) or not self._match_text_seq("TO") 9379 9380 files = self._parse_csv(self._parse_file_location) 9381 if self._match(TokenType.EQ, advance=False): 9382 # Backtrack one token since we've consumed the lhs of a parameter assignment here. 9383 # This can happen for Snowflake dialect. Instead, we'd like to parse the parameter 9384 # list via `_parse_wrapped(..)` below. 9385 self._advance(-1) 9386 files = [] 9387 9388 credentials = self._parse_credentials() 9389 9390 self._match_text_seq("WITH") 9391 9392 params = self._parse_wrapped(self._parse_copy_parameters, optional=True) 9393 9394 # Fallback case 9395 if self._curr: 9396 return self._parse_as_command(start) 9397 9398 return self.expression( 9399 exp.Copy(this=this, kind=kind, credentials=credentials, files=files, params=params) 9400 ) 9401 9402 def _parse_normalize(self) -> exp.Normalize: 9403 return self.expression( 9404 exp.Normalize( 9405 this=self._parse_bitwise(), form=self._match(TokenType.COMMA) and self._parse_var() 9406 ) 9407 ) 9408 9409 def _parse_ceil_floor(self, expr_type: type[TCeilFloor]) -> TCeilFloor: 9410 args = self._parse_csv(lambda: self._parse_lambda()) 9411 9412 this = seq_get(args, 0) 9413 decimals = seq_get(args, 1) 9414 9415 return expr_type( 9416 this=this, 9417 decimals=decimals, 9418 to=self._parse_var() if self._match_text_seq("TO") else None, 9419 ) 9420 9421 def _parse_star_ops(self) -> exp.Expr | None: 9422 star_token = self._prev 9423 9424 if self._match_text_seq("COLUMNS", "(", advance=False): 9425 this = self._parse_function() 9426 if isinstance(this, exp.Columns): 9427 this.set("unpack", True) 9428 return this 9429 9430 return self.expression( 9431 exp.Star( 9432 except_=self._parse_star_op("EXCEPT", "EXCLUDE"), 9433 replace=self._parse_star_op("REPLACE"), 9434 rename=self._parse_star_op("RENAME"), 9435 ) 9436 ).update_positions(star_token) 9437 9438 def _parse_grant_privilege(self) -> exp.GrantPrivilege | None: 9439 privilege_parts = [] 9440 9441 # Keep consuming consecutive keywords until comma (end of this privilege) or ON 9442 # (end of privilege list) or L_PAREN (start of column list) are met 9443 while self._curr and not self._match_set(self.PRIVILEGE_FOLLOW_TOKENS, advance=False): 9444 privilege_parts.append(self._curr.text.upper()) 9445 self._advance() 9446 9447 this = exp.var(" ".join(privilege_parts)) 9448 expressions = ( 9449 self._parse_wrapped_csv(self._parse_column) 9450 if self._match(TokenType.L_PAREN, advance=False) 9451 else None 9452 ) 9453 9454 return self.expression(exp.GrantPrivilege(this=this, expressions=expressions)) 9455 9456 def _parse_grant_principal(self) -> exp.GrantPrincipal | None: 9457 kind = self._match_texts(("ROLE", "GROUP")) and self._prev.text.upper() 9458 principal = self._parse_id_var() 9459 9460 if not principal: 9461 return None 9462 9463 return self.expression(exp.GrantPrincipal(this=principal, kind=kind)) 9464 9465 def _parse_grant_revoke_common( 9466 self, 9467 ) -> tuple[list | None, str | None, exp.Expr | None]: 9468 privileges = self._parse_csv(self._parse_grant_privilege) 9469 9470 self._match(TokenType.ON) 9471 kind = self._prev.text.upper() if self._match_set(self.CREATABLES) else None 9472 9473 # Attempt to parse the securable e.g. MySQL allows names 9474 # such as "foo.*", "*.*" which are not easily parseable yet 9475 securable = self._try_parse(self._parse_table_parts) 9476 9477 return privileges, kind, securable 9478 9479 def _parse_grant(self) -> exp.Grant | exp.Command: 9480 start = self._prev 9481 9482 privileges, kind, securable = self._parse_grant_revoke_common() 9483 9484 if not securable or not self._match_text_seq("TO"): 9485 return self._parse_as_command(start) 9486 9487 principals = self._parse_csv(self._parse_grant_principal) 9488 9489 grant_option = self._match_text_seq("WITH", "GRANT", "OPTION") 9490 9491 if self._curr: 9492 return self._parse_as_command(start) 9493 9494 return self.expression( 9495 exp.Grant( 9496 privileges=privileges, 9497 kind=kind, 9498 securable=securable, 9499 principals=principals, 9500 grant_option=grant_option, 9501 ) 9502 ) 9503 9504 def _parse_revoke(self) -> exp.Revoke | exp.Command: 9505 start = self._prev 9506 9507 grant_option = self._match_text_seq("GRANT", "OPTION", "FOR") 9508 9509 privileges, kind, securable = self._parse_grant_revoke_common() 9510 9511 if not securable or not self._match_text_seq("FROM"): 9512 return self._parse_as_command(start) 9513 9514 principals = self._parse_csv(self._parse_grant_principal) 9515 9516 cascade = None 9517 if self._match_texts(("CASCADE", "RESTRICT")): 9518 cascade = self._prev.text.upper() 9519 9520 if self._curr: 9521 return self._parse_as_command(start) 9522 9523 return self.expression( 9524 exp.Revoke( 9525 privileges=privileges, 9526 kind=kind, 9527 securable=securable, 9528 principals=principals, 9529 grant_option=grant_option, 9530 cascade=cascade, 9531 ) 9532 ) 9533 9534 def _parse_overlay(self) -> exp.Overlay: 9535 def _parse_overlay_arg(text: str) -> exp.Expr | None: 9536 return ( 9537 self._parse_bitwise() 9538 if self._match(TokenType.COMMA) or self._match_text_seq(text) 9539 else None 9540 ) 9541 9542 return self.expression( 9543 exp.Overlay( 9544 this=self._parse_bitwise(), 9545 expression=_parse_overlay_arg("PLACING"), 9546 from_=_parse_overlay_arg("FROM"), 9547 for_=_parse_overlay_arg("FOR"), 9548 ) 9549 ) 9550 9551 def _parse_format_name(self) -> exp.Property: 9552 # Note: Although not specified in the docs, Snowflake does accept a string/identifier 9553 # for FILE_FORMAT = <format_name> 9554 return self.expression( 9555 exp.Property( 9556 this=exp.var("FORMAT_NAME"), value=self._parse_string() or self._parse_table_parts() 9557 ) 9558 ) 9559 9560 def _parse_max_min_by(self, expr_type: type[exp.AggFunc]) -> exp.AggFunc: 9561 args: list[exp.Expr] = [] 9562 9563 if self._match(TokenType.DISTINCT): 9564 args.append(self.expression(exp.Distinct(expressions=[self._parse_lambda()]))) 9565 self._match(TokenType.COMMA) 9566 9567 args.extend(self._parse_function_args()) 9568 9569 return self.expression( 9570 expr_type(this=seq_get(args, 0), expression=seq_get(args, 1), count=seq_get(args, 2)) 9571 ) 9572 9573 def _identifier_expression( 9574 self, token: Token | None = None, quoted: bool | None = None 9575 ) -> exp.Identifier: 9576 token = token or self._prev 9577 return self.expression(exp.Identifier(this=token.text, quoted=quoted), token) 9578 9579 def _build_pipe_cte( 9580 self, 9581 query: exp.Query, 9582 expressions: list[exp.Expr], 9583 alias_cte: exp.TableAlias | None = None, 9584 ) -> exp.Select: 9585 new_cte: str | exp.TableAlias | None 9586 if alias_cte: 9587 new_cte = alias_cte 9588 else: 9589 self._pipe_cte_counter += 1 9590 new_cte = f"__tmp{self._pipe_cte_counter}" 9591 9592 with_ = query.args.get("with_") 9593 ctes = with_.pop() if with_ else None 9594 9595 new_select = exp.select(*expressions, copy=False).from_(new_cte, copy=False) 9596 if ctes: 9597 new_select.set("with_", ctes) 9598 9599 return new_select.with_(new_cte, as_=query, copy=False) 9600 9601 def _parse_pipe_syntax_select(self, query: exp.Select) -> exp.Select: 9602 select = self._parse_select(consume_pipe=False) 9603 if not select: 9604 return query 9605 9606 return self._build_pipe_cte( 9607 query=query.select(*select.expressions, append=False), expressions=[exp.Star()] 9608 ) 9609 9610 def _parse_pipe_syntax_limit(self, query: exp.Select) -> exp.Select: 9611 limit = self._parse_limit() 9612 offset = self._parse_offset() 9613 if limit: 9614 curr_limit = query.args.get("limit", limit) 9615 if curr_limit.expression.to_py() >= limit.expression.to_py(): 9616 query.limit(limit, copy=False) 9617 if offset: 9618 curr_offset = query.args.get("offset") 9619 curr_offset = curr_offset.expression.to_py() if curr_offset else 0 9620 query.offset(exp.Literal.number(curr_offset + offset.expression.to_py()), copy=False) 9621 9622 return query 9623 9624 def _parse_pipe_syntax_aggregate_fields(self) -> exp.Expr | None: 9625 this = self._parse_disjunction() 9626 if self._match_text_seq("GROUP", "AND", advance=False): 9627 return this 9628 9629 this = self._parse_alias(this) 9630 9631 if self._match_set((TokenType.ASC, TokenType.DESC), advance=False): 9632 return self._parse_ordered(lambda: this) 9633 9634 return this 9635 9636 def _parse_pipe_syntax_aggregate_group_order_by( 9637 self, query: exp.Select, group_by_exists: bool = True 9638 ) -> exp.Select: 9639 expr = self._parse_csv(self._parse_pipe_syntax_aggregate_fields) 9640 aggregates_or_groups, orders = [], [] 9641 for element in expr: 9642 if isinstance(element, exp.Ordered): 9643 this = element.this 9644 if isinstance(this, exp.Alias): 9645 element.set("this", this.args["alias"]) 9646 orders.append(element) 9647 else: 9648 this = element 9649 aggregates_or_groups.append(this) 9650 9651 if group_by_exists: 9652 query.select(*aggregates_or_groups, copy=False).group_by( 9653 *[projection.args.get("alias", projection) for projection in aggregates_or_groups], 9654 copy=False, 9655 ) 9656 else: 9657 query.select(*aggregates_or_groups, append=False, copy=False) 9658 9659 if orders: 9660 return query.order_by(*orders, append=False, copy=False) 9661 9662 return query 9663 9664 def _parse_pipe_syntax_aggregate(self, query: exp.Select) -> exp.Select: 9665 self._match_text_seq("AGGREGATE") 9666 query = self._parse_pipe_syntax_aggregate_group_order_by(query, group_by_exists=False) 9667 9668 if self._match(TokenType.GROUP_BY) or ( 9669 self._match_text_seq("GROUP", "AND") and self._match(TokenType.ORDER_BY) 9670 ): 9671 query = self._parse_pipe_syntax_aggregate_group_order_by(query) 9672 9673 return self._build_pipe_cte(query=query, expressions=[exp.Star()]) 9674 9675 def _parse_pipe_syntax_set_operator(self, query: exp.Query) -> exp.Query | None: 9676 first_setop = self.parse_set_operation(this=query) 9677 if not first_setop: 9678 return None 9679 9680 def _parse_and_unwrap_query() -> exp.Expr | None: 9681 expr = self._parse_paren() 9682 return expr.assert_is(exp.Subquery).unnest() if expr else None 9683 9684 first_setop.this.pop() 9685 9686 setops = [ 9687 first_setop.expression.pop().assert_is(exp.Subquery).unnest(), 9688 *self._parse_csv(_parse_and_unwrap_query), 9689 ] 9690 9691 query = self._build_pipe_cte(query=query, expressions=[exp.Star()]) 9692 with_ = query.args.get("with_") 9693 ctes = with_.pop() if with_ else None 9694 9695 if isinstance(first_setop, exp.Union): 9696 query = query.union(*setops, copy=False, **first_setop.args) 9697 elif isinstance(first_setop, exp.Except): 9698 query = query.except_(*setops, copy=False, **first_setop.args) 9699 else: 9700 query = query.intersect(*setops, copy=False, **first_setop.args) 9701 9702 query.set("with_", ctes) 9703 9704 return self._build_pipe_cte(query=query, expressions=[exp.Star()]) 9705 9706 def _parse_pipe_syntax_join(self, query: exp.Query) -> exp.Query | None: 9707 join = self._parse_join() 9708 if not join: 9709 return None 9710 9711 if isinstance(query, exp.Select): 9712 return query.join(join, copy=False) 9713 9714 return query 9715 9716 def _parse_pipe_syntax_pivot(self, query: exp.Select) -> exp.Select: 9717 pivots = self._parse_pivots() 9718 if not pivots: 9719 return query 9720 9721 from_ = query.args.get("from_") 9722 if from_: 9723 from_.this.set("pivots", pivots) 9724 9725 return self._build_pipe_cte(query=query, expressions=[exp.Star()]) 9726 9727 def _parse_pipe_syntax_extend(self, query: exp.Select) -> exp.Select: 9728 self._match_text_seq("EXTEND") 9729 query.select(*[exp.Star(), *self._parse_expressions()], append=False, copy=False) 9730 return self._build_pipe_cte(query=query, expressions=[exp.Star()]) 9731 9732 def _parse_pipe_syntax_tablesample(self, query: exp.Select) -> exp.Select: 9733 sample = self._parse_table_sample() 9734 9735 with_ = query.args.get("with_") 9736 if with_: 9737 with_.expressions[-1].this.set("sample", sample) 9738 else: 9739 query.set("sample", sample) 9740 9741 return query 9742 9743 def _parse_pipe_syntax_query(self, query: exp.Query) -> exp.Query | None: 9744 if isinstance(query, exp.Subquery): 9745 query = exp.select("*").from_(query, copy=False) 9746 9747 if not query.args.get("from_"): 9748 query = exp.select("*").from_(query.subquery(copy=False), copy=False) 9749 9750 while self._match(TokenType.PIPE_GT): 9751 start_index = self._index 9752 start_text = self._curr.text.upper() 9753 parser = self.PIPE_SYNTAX_TRANSFORM_PARSERS.get(start_text) 9754 if not parser: 9755 # The set operators (UNION, etc) and the JOIN operator have a few common starting 9756 # keywords, making it tricky to disambiguate them without lookahead. The approach 9757 # here is to try and parse a set operation and if that fails, then try to parse a 9758 # join operator. If that fails as well, then the operator is not supported. 9759 parsed_query = self._parse_pipe_syntax_set_operator(query) 9760 parsed_query = parsed_query or self._parse_pipe_syntax_join(query) 9761 if not parsed_query: 9762 self._retreat(start_index) 9763 self.raise_error(f"Unsupported pipe syntax operator: '{start_text}'.") 9764 break 9765 query = parsed_query 9766 else: 9767 query = parser(self, query) 9768 9769 return query 9770 9771 def _parse_declareitem(self) -> exp.DeclareItem | None: 9772 self._match_texts(("VAR", "VARIABLE")) 9773 9774 vars = self._parse_csv(self._parse_id_var) 9775 if not vars: 9776 return None 9777 9778 self._match(TokenType.ALIAS) 9779 kind = self._parse_schema() if self._match(TokenType.TABLE) else self._parse_types() 9780 default = ( 9781 self._match(TokenType.DEFAULT) or self._match(TokenType.EQ) 9782 ) and self._parse_bitwise() 9783 9784 return self.expression(exp.DeclareItem(this=vars, kind=kind, default=default)) 9785 9786 def _parse_declare(self) -> exp.Declare | exp.Command: 9787 start = self._prev 9788 replace = self._match_text_seq("OR", "REPLACE") 9789 expressions = self._try_parse(lambda: self._parse_csv(self._parse_declareitem)) 9790 9791 if not expressions or self._curr: 9792 return self._parse_as_command(start) 9793 9794 return self.expression(exp.Declare(expressions=expressions, replace=replace)) 9795 9796 def build_cast(self, strict: bool, **kwargs) -> exp.Cast: 9797 exp_class = exp.Cast if strict else exp.TryCast 9798 9799 if exp_class == exp.TryCast: 9800 kwargs["requires_string"] = self.dialect.TRY_CAST_REQUIRES_STRING 9801 9802 return self.expression(exp_class(**kwargs)) 9803 9804 def _parse_json_value(self) -> exp.JSONValue: 9805 this = self._parse_bitwise() 9806 self._match(TokenType.COMMA) 9807 path = self._parse_bitwise() 9808 9809 returning = self._match(TokenType.RETURNING) and self._parse_type() 9810 9811 return self.expression( 9812 exp.JSONValue( 9813 this=this, 9814 path=self.dialect.to_json_path(path), 9815 returning=returning, 9816 on_condition=self._parse_on_condition(), 9817 ) 9818 ) 9819 9820 def _parse_group_concat(self) -> exp.Expr | None: 9821 def concat_exprs(node: exp.Expr | None, exprs: list[exp.Expr]) -> exp.Expr: 9822 if isinstance(node, exp.Distinct) and len(node.expressions) > 1: 9823 concat_exprs = [ 9824 self.expression( 9825 exp.Concat( 9826 expressions=node.expressions, 9827 safe=True, 9828 coalesce=self.dialect.CONCAT_COALESCE, 9829 ) 9830 ) 9831 ] 9832 node.set("expressions", concat_exprs) 9833 return node 9834 if len(exprs) == 1: 9835 return exprs[0] 9836 return self.expression( 9837 exp.Concat(expressions=args, safe=True, coalesce=self.dialect.CONCAT_COALESCE) 9838 ) 9839 9840 args = self._parse_csv(self._parse_lambda) 9841 9842 if args: 9843 order = args[-1] if isinstance(args[-1], exp.Order) else None 9844 9845 if order: 9846 # Order By is the last (or only) expression in the list and has consumed the 'expr' before it, 9847 # remove 'expr' from exp.Order and add it back to args 9848 args[-1] = order.this 9849 order.set("this", concat_exprs(order.this, args)) 9850 9851 this = order or concat_exprs(args[0], args) 9852 else: 9853 this = None 9854 9855 separator = self._parse_field() if self._match(TokenType.SEPARATOR) else None 9856 9857 return self.expression(exp.GroupConcat(this=this, separator=separator)) 9858 9859 def _parse_initcap(self) -> exp.Initcap: 9860 expr = exp.Initcap.from_arg_list(self._parse_function_args()) 9861 9862 # attach dialect's default delimiters 9863 if expr.args.get("expression") is None: 9864 expr.set("expression", exp.Literal.string(self.dialect.INITCAP_DEFAULT_DELIMITER_CHARS)) 9865 9866 return expr 9867 9868 def _parse_operator(self, this: exp.Expr | None) -> exp.Expr | None: 9869 while True: 9870 if not self._match(TokenType.L_PAREN): 9871 break 9872 9873 op = "" 9874 while self._curr and not self._match(TokenType.R_PAREN): 9875 op += self._curr.text 9876 self._advance() 9877 9878 comments = self._prev_comments 9879 this = self.expression( 9880 exp.Operator(this=this, operator=op, expression=self._parse_bitwise()), 9881 comments=comments, 9882 ) 9883 9884 if not self._match(TokenType.OPERATOR): 9885 break 9886 9887 return this
Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree.
Arguments:
- error_level: The desired error level. Default: ErrorLevel.IMMEDIATE
- error_message_context: The amount of context to capture from a query string when displaying the error message (in number of characters). Default: 100
- max_errors: Maximum number of error messages to include in a raised ParseError. This is only relevant if error_level is ErrorLevel.RAISE. Default: 3
- max_nodes: Maximum number of AST nodes to prevent memory exhaustion. Set to -1 (default) to disable the check.
1828 def __init__( 1829 self, 1830 error_level: ErrorLevel | None = None, 1831 error_message_context: int = 100, 1832 max_errors: int = 3, 1833 max_nodes: int = -1, 1834 dialect: DialectType = None, 1835 ): 1836 self.error_level: ErrorLevel = error_level or ErrorLevel.IMMEDIATE 1837 self.error_message_context: int = error_message_context 1838 self.max_errors: int = max_errors 1839 self.max_nodes: int = max_nodes 1840 self.dialect: t.Any = _resolve_dialect(dialect) 1841 self.sql: str = "" 1842 self.errors: list[ParseError] = [] 1843 self._tokens: list[Token] = [] 1844 self._tokens_size: i64 = 0 1845 self._index: i64 = 0 1846 self._curr: Token = SENTINEL_NONE 1847 self._next: Token = SENTINEL_NONE 1848 self._prev: Token = SENTINEL_NONE 1849 self._prev_comments: list[str] = [] 1850 self._pipe_cte_counter: int = 0 1851 self._chunks: list[list[Token]] = [] 1852 self._chunk_index: i64 = 0 1853 self._node_count: int = 0
1855 def reset(self) -> None: 1856 self.sql = "" 1857 self.errors = [] 1858 self._tokens = [] 1859 self._tokens_size = 0 1860 self._index = 0 1861 self._curr = SENTINEL_NONE 1862 self._next = SENTINEL_NONE 1863 self._prev = SENTINEL_NONE 1864 self._prev_comments = [] 1865 self._pipe_cte_counter = 0 1866 self._chunks = [] 1867 self._chunk_index = 0 1868 self._node_count = 0
1958 def raise_error(self, message: str, token: Token = SENTINEL_NONE) -> None: 1959 token = token or self._curr or self._prev or Token.string("") 1960 formatted_sql, start_context, highlight, end_context = highlight_sql( 1961 sql=self.sql, 1962 positions=[(token.start, token.end)], 1963 context_length=self.error_message_context, 1964 ) 1965 formatted_message = f"{message}. Line {token.line}, Col: {token.col}.\n {formatted_sql}" 1966 1967 error = ParseError.new( 1968 formatted_message, 1969 description=message, 1970 line=token.line, 1971 col=token.col, 1972 start_context=start_context, 1973 highlight=highlight, 1974 end_context=end_context, 1975 ) 1976 1977 if self.error_level == ErrorLevel.IMMEDIATE: 1978 raise error 1979 1980 self.errors.append(error)
1982 def validate_expression(self, expression: E, args: list | None = None) -> E: 1983 if self.max_nodes > -1: 1984 self._node_count += 1 1985 if self._node_count > self.max_nodes: 1986 self.raise_error(f"Maximum number of AST nodes ({self.max_nodes}) exceeded") 1987 if self.error_level != ErrorLevel.IGNORE: 1988 for error_message in expression.error_messages(args): 1989 self.raise_error(error_message) 1990 return expression
2009 def parse(self, raw_tokens: list[Token], sql: str) -> list[exp.Expr | None]: 2010 """ 2011 Parses a list of tokens and returns a list of syntax trees, one tree 2012 per parsed SQL statement. 2013 2014 Args: 2015 raw_tokens: The list of tokens. 2016 sql: The original SQL string. 2017 2018 Returns: 2019 The list of the produced syntax trees. 2020 """ 2021 return self._parse( 2022 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 2023 )
Parses a list of tokens and returns a list of syntax trees, one tree per parsed SQL statement.
Arguments:
- raw_tokens: The list of tokens.
- sql: The original SQL string.
Returns:
The list of the produced syntax trees.
2025 def parse_into( 2026 self, 2027 expression_types: exp.IntoType, 2028 raw_tokens: list[Token], 2029 sql: str | None = None, 2030 ) -> list[exp.Expr | None]: 2031 """ 2032 Parses a list of tokens into a given Expr type. If a collection of Expr 2033 types is given instead, this method will try to parse the token list into each one 2034 of them, stopping at the first for which the parsing succeeds. 2035 2036 Args: 2037 expression_types: The expression type(s) to try and parse the token list into. 2038 raw_tokens: The list of tokens. 2039 sql: The original SQL string, used to produce helpful debug messages. 2040 2041 Returns: 2042 The target Expr. 2043 """ 2044 errors = [] 2045 for expression_type in ensure_list(expression_types): 2046 parser = self.EXPRESSION_PARSERS.get(t.cast(type[exp.Expr], expression_type)) 2047 if not parser: 2048 raise TypeError(f"No parser registered for {expression_type}") 2049 2050 try: 2051 return self._parse(parser, raw_tokens, sql) 2052 except ParseError as e: 2053 e.errors[0]["into_expression"] = expression_type 2054 errors.append(e) 2055 2056 raise ParseError( 2057 f"Failed to parse '{sql or raw_tokens}' into {expression_types}", 2058 errors=merge_errors(errors), 2059 ) from errors[-1]
Parses a list of tokens into a given Expr type. If a collection of Expr types is given instead, this method will try to parse the token list into each one of them, stopping at the first for which the parsing succeeds.
Arguments:
- expression_types: The expression type(s) to try and parse the token list into.
- raw_tokens: The list of tokens.
- sql: The original SQL string, used to produce helpful debug messages.
Returns:
The target Expr.
2061 def check_errors(self) -> None: 2062 """Logs or raises any found errors, depending on the chosen error level setting.""" 2063 if self.error_level == ErrorLevel.WARN: 2064 for error in self.errors: 2065 logger.error(str(error)) 2066 elif self.error_level == ErrorLevel.RAISE and self.errors: 2067 raise ParseError( 2068 concat_messages(self.errors, self.max_errors), 2069 errors=merge_errors(self.errors), 2070 )
Logs or raises any found errors, depending on the chosen error level setting.
2072 def expression( 2073 self, 2074 instance: E, 2075 token: Token | None = None, 2076 comments: list[str] | None = None, 2077 ) -> E: 2078 if token: 2079 instance.update_positions(token) 2080 instance.add_comments(comments) if comments else self._add_comments(instance) 2081 if not instance.is_primitive: 2082 instance = self.validate_expression(instance) 2083 return instance
5548 def parse_set_operation( 5549 self, this: exp.Expr | None, consume_pipe: bool = False 5550 ) -> exp.Expr | None: 5551 start = self._index 5552 _, side_token, kind_token = self._parse_join_parts() 5553 5554 side = side_token.text if side_token else None 5555 kind = kind_token.text if kind_token else None 5556 5557 if not self._match_set(self.SET_OPERATIONS): 5558 self._retreat(start) 5559 return None 5560 5561 token_type = self._prev.token_type 5562 5563 if token_type == TokenType.UNION: 5564 operation: type[exp.SetOperation] = exp.Union 5565 elif token_type == TokenType.EXCEPT: 5566 operation = exp.Except 5567 else: 5568 operation = exp.Intersect 5569 5570 comments = self._prev.comments 5571 5572 if self._match(TokenType.DISTINCT): 5573 distinct: bool | None = True 5574 elif self._match(TokenType.ALL): 5575 distinct = False 5576 else: 5577 distinct = self.dialect.SET_OP_DISTINCT_BY_DEFAULT[operation] 5578 if distinct is None: 5579 self.raise_error(f"Expected DISTINCT or ALL for {operation.__name__}") 5580 5581 by_name = ( 5582 self._match_text_seq("BY", "NAME") 5583 or self._match_text_seq("STRICT", "CORRESPONDING") 5584 or None 5585 ) 5586 if self._match_text_seq("CORRESPONDING"): 5587 by_name = True 5588 if not side and not kind: 5589 kind = "INNER" 5590 5591 on_column_list = None 5592 if by_name and self._match_texts(("ON", "BY")): 5593 on_column_list = self._parse_wrapped_csv(self._parse_column) 5594 5595 expression = self._parse_select( 5596 nested=True, parse_set_operation=False, consume_pipe=consume_pipe 5597 ) 5598 5599 return self.expression( 5600 operation( 5601 this=this, 5602 distinct=distinct, 5603 by_name=by_name, 5604 expression=expression, 5605 side=side, 5606 kind=kind, 5607 on=on_column_list, 5608 ), 5609 comments=comments, 5610 )