sqlglot.parser
1from __future__ import annotations 2 3import logging 4import typing as t 5from collections import defaultdict 6 7from sqlglot import exp 8from sqlglot.errors import ErrorLevel, ParseError, concat_messages, merge_errors 9from sqlglot.helper import apply_index_offset, ensure_list, seq_get 10from sqlglot.time import format_time 11from sqlglot.tokens import Token, Tokenizer, TokenType 12from sqlglot.trie import TrieResult, in_trie, new_trie 13 14if t.TYPE_CHECKING: 15 from sqlglot._typing import E, Lit 16 from sqlglot.dialects.dialect import Dialect, DialectType 17 18 T = t.TypeVar("T") 19 20logger = logging.getLogger("sqlglot") 21 22OPTIONS_TYPE = t.Dict[str, t.Sequence[t.Union[t.Sequence[str], str]]] 23 24 25def build_var_map(args: t.List) -> exp.StarMap | exp.VarMap: 26 if len(args) == 1 and args[0].is_star: 27 return exp.StarMap(this=args[0]) 28 29 keys = [] 30 values = [] 31 for i in range(0, len(args), 2): 32 keys.append(args[i]) 33 values.append(args[i + 1]) 34 35 return exp.VarMap(keys=exp.array(*keys, copy=False), values=exp.array(*values, copy=False)) 36 37 38def build_like(args: t.List) -> exp.Escape | exp.Like: 39 like = exp.Like(this=seq_get(args, 1), expression=seq_get(args, 0)) 40 return exp.Escape(this=like, expression=seq_get(args, 2)) if len(args) > 2 else like 41 42 43def binary_range_parser( 44 expr_type: t.Type[exp.Expression], reverse_args: bool = False 45) -> t.Callable[[Parser, t.Optional[exp.Expression]], t.Optional[exp.Expression]]: 46 def _parse_binary_range( 47 self: Parser, this: t.Optional[exp.Expression] 48 ) -> t.Optional[exp.Expression]: 49 expression = self._parse_bitwise() 50 if reverse_args: 51 this, expression = expression, this 52 return self._parse_escape(self.expression(expr_type, this=this, expression=expression)) 53 54 return _parse_binary_range 55 56 57def build_logarithm(args: t.List, dialect: Dialect) -> exp.Func: 58 # Default argument order is base, expression 59 this = seq_get(args, 0) 60 expression = seq_get(args, 1) 61 62 if expression: 63 if not dialect.LOG_BASE_FIRST: 64 this, expression = expression, this 65 return exp.Log(this=this, expression=expression) 66 67 return (exp.Ln if dialect.parser_class.LOG_DEFAULTS_TO_LN else exp.Log)(this=this) 68 69 70def build_hex(args: t.List, dialect: Dialect) -> exp.Hex | exp.LowerHex: 71 arg = seq_get(args, 0) 72 return exp.LowerHex(this=arg) if dialect.HEX_LOWERCASE else exp.Hex(this=arg) 73 74 75def build_lower(args: t.List) -> exp.Lower | exp.Hex: 76 # LOWER(HEX(..)) can be simplified to LowerHex to simplify its transpilation 77 arg = seq_get(args, 0) 78 return exp.LowerHex(this=arg.this) if isinstance(arg, exp.Hex) else exp.Lower(this=arg) 79 80 81def build_upper(args: t.List) -> exp.Upper | exp.Hex: 82 # UPPER(HEX(..)) can be simplified to Hex to simplify its transpilation 83 arg = seq_get(args, 0) 84 return exp.Hex(this=arg.this) if isinstance(arg, exp.Hex) else exp.Upper(this=arg) 85 86 87def build_extract_json_with_path(expr_type: t.Type[E]) -> t.Callable[[t.List, Dialect], E]: 88 def _builder(args: t.List, dialect: Dialect) -> E: 89 expression = expr_type( 90 this=seq_get(args, 0), expression=dialect.to_json_path(seq_get(args, 1)) 91 ) 92 if len(args) > 2 and expr_type is exp.JSONExtract: 93 expression.set("expressions", args[2:]) 94 95 return expression 96 97 return _builder 98 99 100def build_mod(args: t.List) -> exp.Mod: 101 this = seq_get(args, 0) 102 expression = seq_get(args, 1) 103 104 # Wrap the operands if they are binary nodes, e.g. MOD(a + 1, 7) -> (a + 1) % 7 105 this = exp.Paren(this=this) if isinstance(this, exp.Binary) else this 106 expression = exp.Paren(this=expression) if isinstance(expression, exp.Binary) else expression 107 108 return exp.Mod(this=this, expression=expression) 109 110 111class _Parser(type): 112 def __new__(cls, clsname, bases, attrs): 113 klass = super().__new__(cls, clsname, bases, attrs) 114 115 klass.SHOW_TRIE = new_trie(key.split(" ") for key in klass.SHOW_PARSERS) 116 klass.SET_TRIE = new_trie(key.split(" ") for key in klass.SET_PARSERS) 117 118 return klass 119 120 121class Parser(metaclass=_Parser): 122 """ 123 Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree. 124 125 Args: 126 error_level: The desired error level. 127 Default: ErrorLevel.IMMEDIATE 128 error_message_context: The amount of context to capture from a query string when displaying 129 the error message (in number of characters). 130 Default: 100 131 max_errors: Maximum number of error messages to include in a raised ParseError. 132 This is only relevant if error_level is ErrorLevel.RAISE. 133 Default: 3 134 """ 135 136 FUNCTIONS: t.Dict[str, t.Callable] = { 137 **{name: func.from_arg_list for name, func in exp.FUNCTION_BY_NAME.items()}, 138 "CONCAT": lambda args, dialect: exp.Concat( 139 expressions=args, 140 safe=not dialect.STRICT_STRING_CONCAT, 141 coalesce=dialect.CONCAT_COALESCE, 142 ), 143 "CONCAT_WS": lambda args, dialect: exp.ConcatWs( 144 expressions=args, 145 safe=not dialect.STRICT_STRING_CONCAT, 146 coalesce=dialect.CONCAT_COALESCE, 147 ), 148 "DATE_TO_DATE_STR": lambda args: exp.Cast( 149 this=seq_get(args, 0), 150 to=exp.DataType(this=exp.DataType.Type.TEXT), 151 ), 152 "GLOB": lambda args: exp.Glob(this=seq_get(args, 1), expression=seq_get(args, 0)), 153 "HEX": build_hex, 154 "JSON_EXTRACT": build_extract_json_with_path(exp.JSONExtract), 155 "JSON_EXTRACT_SCALAR": build_extract_json_with_path(exp.JSONExtractScalar), 156 "JSON_EXTRACT_PATH_TEXT": build_extract_json_with_path(exp.JSONExtractScalar), 157 "LIKE": build_like, 158 "LOG": build_logarithm, 159 "LOG2": lambda args: exp.Log(this=exp.Literal.number(2), expression=seq_get(args, 0)), 160 "LOG10": lambda args: exp.Log(this=exp.Literal.number(10), expression=seq_get(args, 0)), 161 "LOWER": build_lower, 162 "MOD": build_mod, 163 "SCOPE_RESOLUTION": lambda args: exp.ScopeResolution(expression=seq_get(args, 0)) 164 if len(args) != 2 165 else exp.ScopeResolution(this=seq_get(args, 0), expression=seq_get(args, 1)), 166 "TIME_TO_TIME_STR": lambda args: exp.Cast( 167 this=seq_get(args, 0), 168 to=exp.DataType(this=exp.DataType.Type.TEXT), 169 ), 170 "TO_HEX": build_hex, 171 "TS_OR_DS_TO_DATE_STR": lambda args: exp.Substring( 172 this=exp.Cast( 173 this=seq_get(args, 0), 174 to=exp.DataType(this=exp.DataType.Type.TEXT), 175 ), 176 start=exp.Literal.number(1), 177 length=exp.Literal.number(10), 178 ), 179 "UNNEST": lambda args: exp.Unnest(expressions=ensure_list(seq_get(args, 0))), 180 "UPPER": build_upper, 181 "VAR_MAP": build_var_map, 182 } 183 184 NO_PAREN_FUNCTIONS = { 185 TokenType.CURRENT_DATE: exp.CurrentDate, 186 TokenType.CURRENT_DATETIME: exp.CurrentDate, 187 TokenType.CURRENT_TIME: exp.CurrentTime, 188 TokenType.CURRENT_TIMESTAMP: exp.CurrentTimestamp, 189 TokenType.CURRENT_USER: exp.CurrentUser, 190 } 191 192 STRUCT_TYPE_TOKENS = { 193 TokenType.NESTED, 194 TokenType.OBJECT, 195 TokenType.STRUCT, 196 } 197 198 NESTED_TYPE_TOKENS = { 199 TokenType.ARRAY, 200 TokenType.LIST, 201 TokenType.LOWCARDINALITY, 202 TokenType.MAP, 203 TokenType.NULLABLE, 204 *STRUCT_TYPE_TOKENS, 205 } 206 207 ENUM_TYPE_TOKENS = { 208 TokenType.ENUM, 209 TokenType.ENUM8, 210 TokenType.ENUM16, 211 } 212 213 AGGREGATE_TYPE_TOKENS = { 214 TokenType.AGGREGATEFUNCTION, 215 TokenType.SIMPLEAGGREGATEFUNCTION, 216 } 217 218 TYPE_TOKENS = { 219 TokenType.BIT, 220 TokenType.BOOLEAN, 221 TokenType.TINYINT, 222 TokenType.UTINYINT, 223 TokenType.SMALLINT, 224 TokenType.USMALLINT, 225 TokenType.INT, 226 TokenType.UINT, 227 TokenType.BIGINT, 228 TokenType.UBIGINT, 229 TokenType.INT128, 230 TokenType.UINT128, 231 TokenType.INT256, 232 TokenType.UINT256, 233 TokenType.MEDIUMINT, 234 TokenType.UMEDIUMINT, 235 TokenType.FIXEDSTRING, 236 TokenType.FLOAT, 237 TokenType.DOUBLE, 238 TokenType.CHAR, 239 TokenType.NCHAR, 240 TokenType.VARCHAR, 241 TokenType.NVARCHAR, 242 TokenType.BPCHAR, 243 TokenType.TEXT, 244 TokenType.MEDIUMTEXT, 245 TokenType.LONGTEXT, 246 TokenType.MEDIUMBLOB, 247 TokenType.LONGBLOB, 248 TokenType.BINARY, 249 TokenType.VARBINARY, 250 TokenType.JSON, 251 TokenType.JSONB, 252 TokenType.INTERVAL, 253 TokenType.TINYBLOB, 254 TokenType.TINYTEXT, 255 TokenType.TIME, 256 TokenType.TIMETZ, 257 TokenType.TIMESTAMP, 258 TokenType.TIMESTAMP_S, 259 TokenType.TIMESTAMP_MS, 260 TokenType.TIMESTAMP_NS, 261 TokenType.TIMESTAMPTZ, 262 TokenType.TIMESTAMPLTZ, 263 TokenType.TIMESTAMPNTZ, 264 TokenType.DATETIME, 265 TokenType.DATETIME64, 266 TokenType.DATE, 267 TokenType.DATE32, 268 TokenType.INT4RANGE, 269 TokenType.INT4MULTIRANGE, 270 TokenType.INT8RANGE, 271 TokenType.INT8MULTIRANGE, 272 TokenType.NUMRANGE, 273 TokenType.NUMMULTIRANGE, 274 TokenType.TSRANGE, 275 TokenType.TSMULTIRANGE, 276 TokenType.TSTZRANGE, 277 TokenType.TSTZMULTIRANGE, 278 TokenType.DATERANGE, 279 TokenType.DATEMULTIRANGE, 280 TokenType.DECIMAL, 281 TokenType.UDECIMAL, 282 TokenType.BIGDECIMAL, 283 TokenType.UUID, 284 TokenType.GEOGRAPHY, 285 TokenType.GEOMETRY, 286 TokenType.HLLSKETCH, 287 TokenType.HSTORE, 288 TokenType.PSEUDO_TYPE, 289 TokenType.SUPER, 290 TokenType.SERIAL, 291 TokenType.SMALLSERIAL, 292 TokenType.BIGSERIAL, 293 TokenType.XML, 294 TokenType.YEAR, 295 TokenType.UNIQUEIDENTIFIER, 296 TokenType.USERDEFINED, 297 TokenType.MONEY, 298 TokenType.SMALLMONEY, 299 TokenType.ROWVERSION, 300 TokenType.IMAGE, 301 TokenType.VARIANT, 302 TokenType.OBJECT, 303 TokenType.OBJECT_IDENTIFIER, 304 TokenType.INET, 305 TokenType.IPADDRESS, 306 TokenType.IPPREFIX, 307 TokenType.IPV4, 308 TokenType.IPV6, 309 TokenType.UNKNOWN, 310 TokenType.NULL, 311 TokenType.NAME, 312 TokenType.TDIGEST, 313 *ENUM_TYPE_TOKENS, 314 *NESTED_TYPE_TOKENS, 315 *AGGREGATE_TYPE_TOKENS, 316 } 317 318 SIGNED_TO_UNSIGNED_TYPE_TOKEN = { 319 TokenType.BIGINT: TokenType.UBIGINT, 320 TokenType.INT: TokenType.UINT, 321 TokenType.MEDIUMINT: TokenType.UMEDIUMINT, 322 TokenType.SMALLINT: TokenType.USMALLINT, 323 TokenType.TINYINT: TokenType.UTINYINT, 324 TokenType.DECIMAL: TokenType.UDECIMAL, 325 } 326 327 SUBQUERY_PREDICATES = { 328 TokenType.ANY: exp.Any, 329 TokenType.ALL: exp.All, 330 TokenType.EXISTS: exp.Exists, 331 TokenType.SOME: exp.Any, 332 } 333 334 RESERVED_TOKENS = { 335 *Tokenizer.SINGLE_TOKENS.values(), 336 TokenType.SELECT, 337 } - {TokenType.IDENTIFIER} 338 339 DB_CREATABLES = { 340 TokenType.DATABASE, 341 TokenType.DICTIONARY, 342 TokenType.MODEL, 343 TokenType.SCHEMA, 344 TokenType.SEQUENCE, 345 TokenType.STORAGE_INTEGRATION, 346 TokenType.TABLE, 347 TokenType.TAG, 348 TokenType.VIEW, 349 TokenType.WAREHOUSE, 350 TokenType.STREAMLIT, 351 } 352 353 CREATABLES = { 354 TokenType.COLUMN, 355 TokenType.CONSTRAINT, 356 TokenType.FOREIGN_KEY, 357 TokenType.FUNCTION, 358 TokenType.INDEX, 359 TokenType.PROCEDURE, 360 *DB_CREATABLES, 361 } 362 363 # Tokens that can represent identifiers 364 ID_VAR_TOKENS = { 365 TokenType.VAR, 366 TokenType.ANTI, 367 TokenType.APPLY, 368 TokenType.ASC, 369 TokenType.ASOF, 370 TokenType.AUTO_INCREMENT, 371 TokenType.BEGIN, 372 TokenType.BPCHAR, 373 TokenType.CACHE, 374 TokenType.CASE, 375 TokenType.COLLATE, 376 TokenType.COMMAND, 377 TokenType.COMMENT, 378 TokenType.COMMIT, 379 TokenType.CONSTRAINT, 380 TokenType.COPY, 381 TokenType.DEFAULT, 382 TokenType.DELETE, 383 TokenType.DESC, 384 TokenType.DESCRIBE, 385 TokenType.DICTIONARY, 386 TokenType.DIV, 387 TokenType.END, 388 TokenType.EXECUTE, 389 TokenType.ESCAPE, 390 TokenType.FALSE, 391 TokenType.FIRST, 392 TokenType.FILTER, 393 TokenType.FINAL, 394 TokenType.FORMAT, 395 TokenType.FULL, 396 TokenType.IDENTIFIER, 397 TokenType.IS, 398 TokenType.ISNULL, 399 TokenType.INTERVAL, 400 TokenType.KEEP, 401 TokenType.KILL, 402 TokenType.LEFT, 403 TokenType.LOAD, 404 TokenType.MERGE, 405 TokenType.NATURAL, 406 TokenType.NEXT, 407 TokenType.OFFSET, 408 TokenType.OPERATOR, 409 TokenType.ORDINALITY, 410 TokenType.OVERLAPS, 411 TokenType.OVERWRITE, 412 TokenType.PARTITION, 413 TokenType.PERCENT, 414 TokenType.PIVOT, 415 TokenType.PRAGMA, 416 TokenType.RANGE, 417 TokenType.RECURSIVE, 418 TokenType.REFERENCES, 419 TokenType.REFRESH, 420 TokenType.REPLACE, 421 TokenType.RIGHT, 422 TokenType.ROLLUP, 423 TokenType.ROW, 424 TokenType.ROWS, 425 TokenType.SEMI, 426 TokenType.SET, 427 TokenType.SETTINGS, 428 TokenType.SHOW, 429 TokenType.TEMPORARY, 430 TokenType.TOP, 431 TokenType.TRUE, 432 TokenType.TRUNCATE, 433 TokenType.UNIQUE, 434 TokenType.UNNEST, 435 TokenType.UNPIVOT, 436 TokenType.UPDATE, 437 TokenType.USE, 438 TokenType.VOLATILE, 439 TokenType.WINDOW, 440 *CREATABLES, 441 *SUBQUERY_PREDICATES, 442 *TYPE_TOKENS, 443 *NO_PAREN_FUNCTIONS, 444 } 445 446 INTERVAL_VARS = ID_VAR_TOKENS - {TokenType.END} 447 448 TABLE_ALIAS_TOKENS = ID_VAR_TOKENS - { 449 TokenType.ANTI, 450 TokenType.APPLY, 451 TokenType.ASOF, 452 TokenType.FULL, 453 TokenType.LEFT, 454 TokenType.LOCK, 455 TokenType.NATURAL, 456 TokenType.OFFSET, 457 TokenType.RIGHT, 458 TokenType.SEMI, 459 TokenType.WINDOW, 460 } 461 462 ALIAS_TOKENS = ID_VAR_TOKENS 463 464 ARRAY_CONSTRUCTORS = { 465 "ARRAY": exp.Array, 466 "LIST": exp.List, 467 } 468 469 COMMENT_TABLE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.IS} 470 471 UPDATE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.SET} 472 473 TRIM_TYPES = {"LEADING", "TRAILING", "BOTH"} 474 475 FUNC_TOKENS = { 476 TokenType.COLLATE, 477 TokenType.COMMAND, 478 TokenType.CURRENT_DATE, 479 TokenType.CURRENT_DATETIME, 480 TokenType.CURRENT_TIMESTAMP, 481 TokenType.CURRENT_TIME, 482 TokenType.CURRENT_USER, 483 TokenType.FILTER, 484 TokenType.FIRST, 485 TokenType.FORMAT, 486 TokenType.GLOB, 487 TokenType.IDENTIFIER, 488 TokenType.INDEX, 489 TokenType.ISNULL, 490 TokenType.ILIKE, 491 TokenType.INSERT, 492 TokenType.LIKE, 493 TokenType.MERGE, 494 TokenType.OFFSET, 495 TokenType.PRIMARY_KEY, 496 TokenType.RANGE, 497 TokenType.REPLACE, 498 TokenType.RLIKE, 499 TokenType.ROW, 500 TokenType.UNNEST, 501 TokenType.VAR, 502 TokenType.LEFT, 503 TokenType.RIGHT, 504 TokenType.SEQUENCE, 505 TokenType.DATE, 506 TokenType.DATETIME, 507 TokenType.TABLE, 508 TokenType.TIMESTAMP, 509 TokenType.TIMESTAMPTZ, 510 TokenType.TRUNCATE, 511 TokenType.WINDOW, 512 TokenType.XOR, 513 *TYPE_TOKENS, 514 *SUBQUERY_PREDICATES, 515 } 516 517 CONJUNCTION: t.Dict[TokenType, t.Type[exp.Expression]] = { 518 TokenType.AND: exp.And, 519 } 520 521 ASSIGNMENT: t.Dict[TokenType, t.Type[exp.Expression]] = { 522 TokenType.COLON_EQ: exp.PropertyEQ, 523 } 524 525 DISJUNCTION: t.Dict[TokenType, t.Type[exp.Expression]] = { 526 TokenType.OR: exp.Or, 527 } 528 529 EQUALITY = { 530 TokenType.EQ: exp.EQ, 531 TokenType.NEQ: exp.NEQ, 532 TokenType.NULLSAFE_EQ: exp.NullSafeEQ, 533 } 534 535 COMPARISON = { 536 TokenType.GT: exp.GT, 537 TokenType.GTE: exp.GTE, 538 TokenType.LT: exp.LT, 539 TokenType.LTE: exp.LTE, 540 } 541 542 BITWISE = { 543 TokenType.AMP: exp.BitwiseAnd, 544 TokenType.CARET: exp.BitwiseXor, 545 TokenType.PIPE: exp.BitwiseOr, 546 } 547 548 TERM = { 549 TokenType.DASH: exp.Sub, 550 TokenType.PLUS: exp.Add, 551 TokenType.MOD: exp.Mod, 552 TokenType.COLLATE: exp.Collate, 553 } 554 555 FACTOR = { 556 TokenType.DIV: exp.IntDiv, 557 TokenType.LR_ARROW: exp.Distance, 558 TokenType.SLASH: exp.Div, 559 TokenType.STAR: exp.Mul, 560 } 561 562 EXPONENT: t.Dict[TokenType, t.Type[exp.Expression]] = {} 563 564 TIMES = { 565 TokenType.TIME, 566 TokenType.TIMETZ, 567 } 568 569 TIMESTAMPS = { 570 TokenType.TIMESTAMP, 571 TokenType.TIMESTAMPTZ, 572 TokenType.TIMESTAMPLTZ, 573 *TIMES, 574 } 575 576 SET_OPERATIONS = { 577 TokenType.UNION, 578 TokenType.INTERSECT, 579 TokenType.EXCEPT, 580 } 581 582 JOIN_METHODS = { 583 TokenType.ASOF, 584 TokenType.NATURAL, 585 TokenType.POSITIONAL, 586 } 587 588 JOIN_SIDES = { 589 TokenType.LEFT, 590 TokenType.RIGHT, 591 TokenType.FULL, 592 } 593 594 JOIN_KINDS = { 595 TokenType.ANTI, 596 TokenType.CROSS, 597 TokenType.INNER, 598 TokenType.OUTER, 599 TokenType.SEMI, 600 TokenType.STRAIGHT_JOIN, 601 } 602 603 JOIN_HINTS: t.Set[str] = set() 604 605 LAMBDAS = { 606 TokenType.ARROW: lambda self, expressions: self.expression( 607 exp.Lambda, 608 this=self._replace_lambda( 609 self._parse_assignment(), 610 expressions, 611 ), 612 expressions=expressions, 613 ), 614 TokenType.FARROW: lambda self, expressions: self.expression( 615 exp.Kwarg, 616 this=exp.var(expressions[0].name), 617 expression=self._parse_assignment(), 618 ), 619 } 620 621 COLUMN_OPERATORS = { 622 TokenType.DOT: None, 623 TokenType.DCOLON: lambda self, this, to: self.expression( 624 exp.Cast if self.STRICT_CAST else exp.TryCast, 625 this=this, 626 to=to, 627 ), 628 TokenType.ARROW: lambda self, this, path: self.expression( 629 exp.JSONExtract, 630 this=this, 631 expression=self.dialect.to_json_path(path), 632 only_json_types=self.JSON_ARROWS_REQUIRE_JSON_TYPE, 633 ), 634 TokenType.DARROW: lambda self, this, path: self.expression( 635 exp.JSONExtractScalar, 636 this=this, 637 expression=self.dialect.to_json_path(path), 638 only_json_types=self.JSON_ARROWS_REQUIRE_JSON_TYPE, 639 ), 640 TokenType.HASH_ARROW: lambda self, this, path: self.expression( 641 exp.JSONBExtract, 642 this=this, 643 expression=path, 644 ), 645 TokenType.DHASH_ARROW: lambda self, this, path: self.expression( 646 exp.JSONBExtractScalar, 647 this=this, 648 expression=path, 649 ), 650 TokenType.PLACEHOLDER: lambda self, this, key: self.expression( 651 exp.JSONBContains, 652 this=this, 653 expression=key, 654 ), 655 } 656 657 EXPRESSION_PARSERS = { 658 exp.Cluster: lambda self: self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 659 exp.Column: lambda self: self._parse_column(), 660 exp.Condition: lambda self: self._parse_assignment(), 661 exp.DataType: lambda self: self._parse_types(allow_identifiers=False), 662 exp.Expression: lambda self: self._parse_expression(), 663 exp.From: lambda self: self._parse_from(joins=True), 664 exp.Group: lambda self: self._parse_group(), 665 exp.Having: lambda self: self._parse_having(), 666 exp.Identifier: lambda self: self._parse_id_var(), 667 exp.Join: lambda self: self._parse_join(), 668 exp.Lambda: lambda self: self._parse_lambda(), 669 exp.Lateral: lambda self: self._parse_lateral(), 670 exp.Limit: lambda self: self._parse_limit(), 671 exp.Offset: lambda self: self._parse_offset(), 672 exp.Order: lambda self: self._parse_order(), 673 exp.Ordered: lambda self: self._parse_ordered(), 674 exp.Properties: lambda self: self._parse_properties(), 675 exp.Qualify: lambda self: self._parse_qualify(), 676 exp.Returning: lambda self: self._parse_returning(), 677 exp.Sort: lambda self: self._parse_sort(exp.Sort, TokenType.SORT_BY), 678 exp.Table: lambda self: self._parse_table_parts(), 679 exp.TableAlias: lambda self: self._parse_table_alias(), 680 exp.When: lambda self: seq_get(self._parse_when_matched(), 0), 681 exp.Where: lambda self: self._parse_where(), 682 exp.Window: lambda self: self._parse_named_window(), 683 exp.With: lambda self: self._parse_with(), 684 "JOIN_TYPE": lambda self: self._parse_join_parts(), 685 } 686 687 STATEMENT_PARSERS = { 688 TokenType.ALTER: lambda self: self._parse_alter(), 689 TokenType.BEGIN: lambda self: self._parse_transaction(), 690 TokenType.CACHE: lambda self: self._parse_cache(), 691 TokenType.COMMENT: lambda self: self._parse_comment(), 692 TokenType.COMMIT: lambda self: self._parse_commit_or_rollback(), 693 TokenType.COPY: lambda self: self._parse_copy(), 694 TokenType.CREATE: lambda self: self._parse_create(), 695 TokenType.DELETE: lambda self: self._parse_delete(), 696 TokenType.DESC: lambda self: self._parse_describe(), 697 TokenType.DESCRIBE: lambda self: self._parse_describe(), 698 TokenType.DROP: lambda self: self._parse_drop(), 699 TokenType.INSERT: lambda self: self._parse_insert(), 700 TokenType.KILL: lambda self: self._parse_kill(), 701 TokenType.LOAD: lambda self: self._parse_load(), 702 TokenType.MERGE: lambda self: self._parse_merge(), 703 TokenType.PIVOT: lambda self: self._parse_simplified_pivot(), 704 TokenType.PRAGMA: lambda self: self.expression(exp.Pragma, this=self._parse_expression()), 705 TokenType.REFRESH: lambda self: self._parse_refresh(), 706 TokenType.ROLLBACK: lambda self: self._parse_commit_or_rollback(), 707 TokenType.SET: lambda self: self._parse_set(), 708 TokenType.TRUNCATE: lambda self: self._parse_truncate_table(), 709 TokenType.UNCACHE: lambda self: self._parse_uncache(), 710 TokenType.UPDATE: lambda self: self._parse_update(), 711 TokenType.USE: lambda self: self.expression( 712 exp.Use, 713 kind=self._parse_var_from_options(self.USABLES, raise_unmatched=False), 714 this=self._parse_table(schema=False), 715 ), 716 TokenType.SEMICOLON: lambda self: self.expression(exp.Semicolon), 717 } 718 719 UNARY_PARSERS = { 720 TokenType.PLUS: lambda self: self._parse_unary(), # Unary + is handled as a no-op 721 TokenType.NOT: lambda self: self.expression(exp.Not, this=self._parse_equality()), 722 TokenType.TILDA: lambda self: self.expression(exp.BitwiseNot, this=self._parse_unary()), 723 TokenType.DASH: lambda self: self.expression(exp.Neg, this=self._parse_unary()), 724 TokenType.PIPE_SLASH: lambda self: self.expression(exp.Sqrt, this=self._parse_unary()), 725 TokenType.DPIPE_SLASH: lambda self: self.expression(exp.Cbrt, this=self._parse_unary()), 726 } 727 728 STRING_PARSERS = { 729 TokenType.HEREDOC_STRING: lambda self, token: self.expression( 730 exp.RawString, this=token.text 731 ), 732 TokenType.NATIONAL_STRING: lambda self, token: self.expression( 733 exp.National, this=token.text 734 ), 735 TokenType.RAW_STRING: lambda self, token: self.expression(exp.RawString, this=token.text), 736 TokenType.STRING: lambda self, token: self.expression( 737 exp.Literal, this=token.text, is_string=True 738 ), 739 TokenType.UNICODE_STRING: lambda self, token: self.expression( 740 exp.UnicodeString, 741 this=token.text, 742 escape=self._match_text_seq("UESCAPE") and self._parse_string(), 743 ), 744 } 745 746 NUMERIC_PARSERS = { 747 TokenType.BIT_STRING: lambda self, token: self.expression(exp.BitString, this=token.text), 748 TokenType.BYTE_STRING: lambda self, token: self.expression(exp.ByteString, this=token.text), 749 TokenType.HEX_STRING: lambda self, token: self.expression(exp.HexString, this=token.text), 750 TokenType.NUMBER: lambda self, token: self.expression( 751 exp.Literal, this=token.text, is_string=False 752 ), 753 } 754 755 PRIMARY_PARSERS = { 756 **STRING_PARSERS, 757 **NUMERIC_PARSERS, 758 TokenType.INTRODUCER: lambda self, token: self._parse_introducer(token), 759 TokenType.NULL: lambda self, _: self.expression(exp.Null), 760 TokenType.TRUE: lambda self, _: self.expression(exp.Boolean, this=True), 761 TokenType.FALSE: lambda self, _: self.expression(exp.Boolean, this=False), 762 TokenType.SESSION_PARAMETER: lambda self, _: self._parse_session_parameter(), 763 TokenType.STAR: lambda self, _: self.expression( 764 exp.Star, 765 **{ 766 "except": self._parse_star_op("EXCEPT", "EXCLUDE"), 767 "replace": self._parse_star_op("REPLACE"), 768 "rename": self._parse_star_op("RENAME"), 769 }, 770 ), 771 } 772 773 PLACEHOLDER_PARSERS = { 774 TokenType.PLACEHOLDER: lambda self: self.expression(exp.Placeholder), 775 TokenType.PARAMETER: lambda self: self._parse_parameter(), 776 TokenType.COLON: lambda self: ( 777 self.expression(exp.Placeholder, this=self._prev.text) 778 if self._match(TokenType.NUMBER) or self._match_set(self.ID_VAR_TOKENS) 779 else None 780 ), 781 } 782 783 RANGE_PARSERS = { 784 TokenType.BETWEEN: lambda self, this: self._parse_between(this), 785 TokenType.GLOB: binary_range_parser(exp.Glob), 786 TokenType.ILIKE: binary_range_parser(exp.ILike), 787 TokenType.IN: lambda self, this: self._parse_in(this), 788 TokenType.IRLIKE: binary_range_parser(exp.RegexpILike), 789 TokenType.IS: lambda self, this: self._parse_is(this), 790 TokenType.LIKE: binary_range_parser(exp.Like), 791 TokenType.OVERLAPS: binary_range_parser(exp.Overlaps), 792 TokenType.RLIKE: binary_range_parser(exp.RegexpLike), 793 TokenType.SIMILAR_TO: binary_range_parser(exp.SimilarTo), 794 TokenType.FOR: lambda self, this: self._parse_comprehension(this), 795 } 796 797 PROPERTY_PARSERS: t.Dict[str, t.Callable] = { 798 "ALLOWED_VALUES": lambda self: self.expression( 799 exp.AllowedValuesProperty, expressions=self._parse_csv(self._parse_primary) 800 ), 801 "ALGORITHM": lambda self: self._parse_property_assignment(exp.AlgorithmProperty), 802 "AUTO": lambda self: self._parse_auto_property(), 803 "AUTO_INCREMENT": lambda self: self._parse_property_assignment(exp.AutoIncrementProperty), 804 "BACKUP": lambda self: self.expression( 805 exp.BackupProperty, this=self._parse_var(any_token=True) 806 ), 807 "BLOCKCOMPRESSION": lambda self: self._parse_blockcompression(), 808 "CHARSET": lambda self, **kwargs: self._parse_character_set(**kwargs), 809 "CHARACTER SET": lambda self, **kwargs: self._parse_character_set(**kwargs), 810 "CHECKSUM": lambda self: self._parse_checksum(), 811 "CLUSTER BY": lambda self: self._parse_cluster(), 812 "CLUSTERED": lambda self: self._parse_clustered_by(), 813 "COLLATE": lambda self, **kwargs: self._parse_property_assignment( 814 exp.CollateProperty, **kwargs 815 ), 816 "COMMENT": lambda self: self._parse_property_assignment(exp.SchemaCommentProperty), 817 "CONTAINS": lambda self: self._parse_contains_property(), 818 "COPY": lambda self: self._parse_copy_property(), 819 "DATABLOCKSIZE": lambda self, **kwargs: self._parse_datablocksize(**kwargs), 820 "DATA_DELETION": lambda self: self._parse_data_deletion_property(), 821 "DEFINER": lambda self: self._parse_definer(), 822 "DETERMINISTIC": lambda self: self.expression( 823 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 824 ), 825 "DYNAMIC": lambda self: self.expression(exp.DynamicProperty), 826 "DISTKEY": lambda self: self._parse_distkey(), 827 "DISTSTYLE": lambda self: self._parse_property_assignment(exp.DistStyleProperty), 828 "ENGINE": lambda self: self._parse_property_assignment(exp.EngineProperty), 829 "EXECUTE": lambda self: self._parse_property_assignment(exp.ExecuteAsProperty), 830 "EXTERNAL": lambda self: self.expression(exp.ExternalProperty), 831 "FALLBACK": lambda self, **kwargs: self._parse_fallback(**kwargs), 832 "FORMAT": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 833 "FREESPACE": lambda self: self._parse_freespace(), 834 "GLOBAL": lambda self: self.expression(exp.GlobalProperty), 835 "HEAP": lambda self: self.expression(exp.HeapProperty), 836 "ICEBERG": lambda self: self.expression(exp.IcebergProperty), 837 "IMMUTABLE": lambda self: self.expression( 838 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 839 ), 840 "INHERITS": lambda self: self.expression( 841 exp.InheritsProperty, expressions=self._parse_wrapped_csv(self._parse_table) 842 ), 843 "INPUT": lambda self: self.expression(exp.InputModelProperty, this=self._parse_schema()), 844 "JOURNAL": lambda self, **kwargs: self._parse_journal(**kwargs), 845 "LANGUAGE": lambda self: self._parse_property_assignment(exp.LanguageProperty), 846 "LAYOUT": lambda self: self._parse_dict_property(this="LAYOUT"), 847 "LIFETIME": lambda self: self._parse_dict_range(this="LIFETIME"), 848 "LIKE": lambda self: self._parse_create_like(), 849 "LOCATION": lambda self: self._parse_property_assignment(exp.LocationProperty), 850 "LOCK": lambda self: self._parse_locking(), 851 "LOCKING": lambda self: self._parse_locking(), 852 "LOG": lambda self, **kwargs: self._parse_log(**kwargs), 853 "MATERIALIZED": lambda self: self.expression(exp.MaterializedProperty), 854 "MERGEBLOCKRATIO": lambda self, **kwargs: self._parse_mergeblockratio(**kwargs), 855 "MODIFIES": lambda self: self._parse_modifies_property(), 856 "MULTISET": lambda self: self.expression(exp.SetProperty, multi=True), 857 "NO": lambda self: self._parse_no_property(), 858 "ON": lambda self: self._parse_on_property(), 859 "ORDER BY": lambda self: self._parse_order(skip_order_token=True), 860 "OUTPUT": lambda self: self.expression(exp.OutputModelProperty, this=self._parse_schema()), 861 "PARTITION": lambda self: self._parse_partitioned_of(), 862 "PARTITION BY": lambda self: self._parse_partitioned_by(), 863 "PARTITIONED BY": lambda self: self._parse_partitioned_by(), 864 "PARTITIONED_BY": lambda self: self._parse_partitioned_by(), 865 "PRIMARY KEY": lambda self: self._parse_primary_key(in_props=True), 866 "RANGE": lambda self: self._parse_dict_range(this="RANGE"), 867 "READS": lambda self: self._parse_reads_property(), 868 "REMOTE": lambda self: self._parse_remote_with_connection(), 869 "RETURNS": lambda self: self._parse_returns(), 870 "STRICT": lambda self: self.expression(exp.StrictProperty), 871 "ROW": lambda self: self._parse_row(), 872 "ROW_FORMAT": lambda self: self._parse_property_assignment(exp.RowFormatProperty), 873 "SAMPLE": lambda self: self.expression( 874 exp.SampleProperty, this=self._match_text_seq("BY") and self._parse_bitwise() 875 ), 876 "SECURE": lambda self: self.expression(exp.SecureProperty), 877 "SET": lambda self: self.expression(exp.SetProperty, multi=False), 878 "SETTINGS": lambda self: self.expression( 879 exp.SettingsProperty, expressions=self._parse_csv(self._parse_set_item) 880 ), 881 "SHARING": lambda self: self._parse_property_assignment(exp.SharingProperty), 882 "SORTKEY": lambda self: self._parse_sortkey(), 883 "SOURCE": lambda self: self._parse_dict_property(this="SOURCE"), 884 "STABLE": lambda self: self.expression( 885 exp.StabilityProperty, this=exp.Literal.string("STABLE") 886 ), 887 "STORED": lambda self: self._parse_stored(), 888 "SYSTEM_VERSIONING": lambda self: self._parse_system_versioning_property(), 889 "TBLPROPERTIES": lambda self: self._parse_wrapped_properties(), 890 "TEMP": lambda self: self.expression(exp.TemporaryProperty), 891 "TEMPORARY": lambda self: self.expression(exp.TemporaryProperty), 892 "TO": lambda self: self._parse_to_table(), 893 "TRANSIENT": lambda self: self.expression(exp.TransientProperty), 894 "TRANSFORM": lambda self: self.expression( 895 exp.TransformModelProperty, expressions=self._parse_wrapped_csv(self._parse_expression) 896 ), 897 "TTL": lambda self: self._parse_ttl(), 898 "USING": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 899 "UNLOGGED": lambda self: self.expression(exp.UnloggedProperty), 900 "VOLATILE": lambda self: self._parse_volatile_property(), 901 "WITH": lambda self: self._parse_with_property(), 902 } 903 904 CONSTRAINT_PARSERS = { 905 "AUTOINCREMENT": lambda self: self._parse_auto_increment(), 906 "AUTO_INCREMENT": lambda self: self._parse_auto_increment(), 907 "CASESPECIFIC": lambda self: self.expression(exp.CaseSpecificColumnConstraint, not_=False), 908 "CHARACTER SET": lambda self: self.expression( 909 exp.CharacterSetColumnConstraint, this=self._parse_var_or_string() 910 ), 911 "CHECK": lambda self: self.expression( 912 exp.CheckColumnConstraint, 913 this=self._parse_wrapped(self._parse_assignment), 914 enforced=self._match_text_seq("ENFORCED"), 915 ), 916 "COLLATE": lambda self: self.expression( 917 exp.CollateColumnConstraint, this=self._parse_var(any_token=True) 918 ), 919 "COMMENT": lambda self: self.expression( 920 exp.CommentColumnConstraint, this=self._parse_string() 921 ), 922 "COMPRESS": lambda self: self._parse_compress(), 923 "CLUSTERED": lambda self: self.expression( 924 exp.ClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 925 ), 926 "NONCLUSTERED": lambda self: self.expression( 927 exp.NonClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 928 ), 929 "DEFAULT": lambda self: self.expression( 930 exp.DefaultColumnConstraint, this=self._parse_bitwise() 931 ), 932 "ENCODE": lambda self: self.expression(exp.EncodeColumnConstraint, this=self._parse_var()), 933 "EPHEMERAL": lambda self: self.expression( 934 exp.EphemeralColumnConstraint, this=self._parse_bitwise() 935 ), 936 "EXCLUDE": lambda self: self.expression( 937 exp.ExcludeColumnConstraint, this=self._parse_index_params() 938 ), 939 "FOREIGN KEY": lambda self: self._parse_foreign_key(), 940 "FORMAT": lambda self: self.expression( 941 exp.DateFormatColumnConstraint, this=self._parse_var_or_string() 942 ), 943 "GENERATED": lambda self: self._parse_generated_as_identity(), 944 "IDENTITY": lambda self: self._parse_auto_increment(), 945 "INLINE": lambda self: self._parse_inline(), 946 "LIKE": lambda self: self._parse_create_like(), 947 "NOT": lambda self: self._parse_not_constraint(), 948 "NULL": lambda self: self.expression(exp.NotNullColumnConstraint, allow_null=True), 949 "ON": lambda self: ( 950 self._match(TokenType.UPDATE) 951 and self.expression(exp.OnUpdateColumnConstraint, this=self._parse_function()) 952 ) 953 or self.expression(exp.OnProperty, this=self._parse_id_var()), 954 "PATH": lambda self: self.expression(exp.PathColumnConstraint, this=self._parse_string()), 955 "PERIOD": lambda self: self._parse_period_for_system_time(), 956 "PRIMARY KEY": lambda self: self._parse_primary_key(), 957 "REFERENCES": lambda self: self._parse_references(match=False), 958 "TITLE": lambda self: self.expression( 959 exp.TitleColumnConstraint, this=self._parse_var_or_string() 960 ), 961 "TTL": lambda self: self.expression(exp.MergeTreeTTL, expressions=[self._parse_bitwise()]), 962 "UNIQUE": lambda self: self._parse_unique(), 963 "UPPERCASE": lambda self: self.expression(exp.UppercaseColumnConstraint), 964 "WITH": lambda self: self.expression( 965 exp.Properties, expressions=self._parse_wrapped_properties() 966 ), 967 } 968 969 ALTER_PARSERS = { 970 "ADD": lambda self: self._parse_alter_table_add(), 971 "ALTER": lambda self: self._parse_alter_table_alter(), 972 "CLUSTER BY": lambda self: self._parse_cluster(wrapped=True), 973 "DELETE": lambda self: self.expression(exp.Delete, where=self._parse_where()), 974 "DROP": lambda self: self._parse_alter_table_drop(), 975 "RENAME": lambda self: self._parse_alter_table_rename(), 976 "SET": lambda self: self._parse_alter_table_set(), 977 } 978 979 ALTER_ALTER_PARSERS = { 980 "DISTKEY": lambda self: self._parse_alter_diststyle(), 981 "DISTSTYLE": lambda self: self._parse_alter_diststyle(), 982 "SORTKEY": lambda self: self._parse_alter_sortkey(), 983 "COMPOUND": lambda self: self._parse_alter_sortkey(compound=True), 984 } 985 986 SCHEMA_UNNAMED_CONSTRAINTS = { 987 "CHECK", 988 "EXCLUDE", 989 "FOREIGN KEY", 990 "LIKE", 991 "PERIOD", 992 "PRIMARY KEY", 993 "UNIQUE", 994 } 995 996 NO_PAREN_FUNCTION_PARSERS = { 997 "ANY": lambda self: self.expression(exp.Any, this=self._parse_bitwise()), 998 "CASE": lambda self: self._parse_case(), 999 "IF": lambda self: self._parse_if(), 1000 "NEXT": lambda self: self._parse_next_value_for(), 1001 } 1002 1003 INVALID_FUNC_NAME_TOKENS = { 1004 TokenType.IDENTIFIER, 1005 TokenType.STRING, 1006 } 1007 1008 FUNCTIONS_WITH_ALIASED_ARGS = {"STRUCT"} 1009 1010 KEY_VALUE_DEFINITIONS = (exp.Alias, exp.EQ, exp.PropertyEQ, exp.Slice) 1011 1012 FUNCTION_PARSERS = { 1013 "CAST": lambda self: self._parse_cast(self.STRICT_CAST), 1014 "CONVERT": lambda self: self._parse_convert(self.STRICT_CAST), 1015 "DECODE": lambda self: self._parse_decode(), 1016 "EXTRACT": lambda self: self._parse_extract(), 1017 "GAP_FILL": lambda self: self._parse_gap_fill(), 1018 "JSON_OBJECT": lambda self: self._parse_json_object(), 1019 "JSON_OBJECTAGG": lambda self: self._parse_json_object(agg=True), 1020 "JSON_TABLE": lambda self: self._parse_json_table(), 1021 "MATCH": lambda self: self._parse_match_against(), 1022 "OPENJSON": lambda self: self._parse_open_json(), 1023 "POSITION": lambda self: self._parse_position(), 1024 "PREDICT": lambda self: self._parse_predict(), 1025 "SAFE_CAST": lambda self: self._parse_cast(False, safe=True), 1026 "STRING_AGG": lambda self: self._parse_string_agg(), 1027 "SUBSTRING": lambda self: self._parse_substring(), 1028 "TRIM": lambda self: self._parse_trim(), 1029 "TRY_CAST": lambda self: self._parse_cast(False, safe=True), 1030 "TRY_CONVERT": lambda self: self._parse_convert(False, safe=True), 1031 } 1032 1033 QUERY_MODIFIER_PARSERS = { 1034 TokenType.MATCH_RECOGNIZE: lambda self: ("match", self._parse_match_recognize()), 1035 TokenType.PREWHERE: lambda self: ("prewhere", self._parse_prewhere()), 1036 TokenType.WHERE: lambda self: ("where", self._parse_where()), 1037 TokenType.GROUP_BY: lambda self: ("group", self._parse_group()), 1038 TokenType.HAVING: lambda self: ("having", self._parse_having()), 1039 TokenType.QUALIFY: lambda self: ("qualify", self._parse_qualify()), 1040 TokenType.WINDOW: lambda self: ("windows", self._parse_window_clause()), 1041 TokenType.ORDER_BY: lambda self: ("order", self._parse_order()), 1042 TokenType.LIMIT: lambda self: ("limit", self._parse_limit()), 1043 TokenType.FETCH: lambda self: ("limit", self._parse_limit()), 1044 TokenType.OFFSET: lambda self: ("offset", self._parse_offset()), 1045 TokenType.FOR: lambda self: ("locks", self._parse_locks()), 1046 TokenType.LOCK: lambda self: ("locks", self._parse_locks()), 1047 TokenType.TABLE_SAMPLE: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 1048 TokenType.USING: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 1049 TokenType.CLUSTER_BY: lambda self: ( 1050 "cluster", 1051 self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 1052 ), 1053 TokenType.DISTRIBUTE_BY: lambda self: ( 1054 "distribute", 1055 self._parse_sort(exp.Distribute, TokenType.DISTRIBUTE_BY), 1056 ), 1057 TokenType.SORT_BY: lambda self: ("sort", self._parse_sort(exp.Sort, TokenType.SORT_BY)), 1058 TokenType.CONNECT_BY: lambda self: ("connect", self._parse_connect(skip_start_token=True)), 1059 TokenType.START_WITH: lambda self: ("connect", self._parse_connect()), 1060 } 1061 1062 SET_PARSERS = { 1063 "GLOBAL": lambda self: self._parse_set_item_assignment("GLOBAL"), 1064 "LOCAL": lambda self: self._parse_set_item_assignment("LOCAL"), 1065 "SESSION": lambda self: self._parse_set_item_assignment("SESSION"), 1066 "TRANSACTION": lambda self: self._parse_set_transaction(), 1067 } 1068 1069 SHOW_PARSERS: t.Dict[str, t.Callable] = {} 1070 1071 TYPE_LITERAL_PARSERS = { 1072 exp.DataType.Type.JSON: lambda self, this, _: self.expression(exp.ParseJSON, this=this), 1073 } 1074 1075 TYPE_CONVERTERS: t.Dict[exp.DataType.Type, t.Callable[[exp.DataType], exp.DataType]] = {} 1076 1077 DDL_SELECT_TOKENS = {TokenType.SELECT, TokenType.WITH, TokenType.L_PAREN} 1078 1079 PRE_VOLATILE_TOKENS = {TokenType.CREATE, TokenType.REPLACE, TokenType.UNIQUE} 1080 1081 TRANSACTION_KIND = {"DEFERRED", "IMMEDIATE", "EXCLUSIVE"} 1082 TRANSACTION_CHARACTERISTICS: OPTIONS_TYPE = { 1083 "ISOLATION": ( 1084 ("LEVEL", "REPEATABLE", "READ"), 1085 ("LEVEL", "READ", "COMMITTED"), 1086 ("LEVEL", "READ", "UNCOMITTED"), 1087 ("LEVEL", "SERIALIZABLE"), 1088 ), 1089 "READ": ("WRITE", "ONLY"), 1090 } 1091 1092 CONFLICT_ACTIONS: OPTIONS_TYPE = dict.fromkeys( 1093 ("ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK", "UPDATE"), tuple() 1094 ) 1095 CONFLICT_ACTIONS["DO"] = ("NOTHING", "UPDATE") 1096 1097 CREATE_SEQUENCE: OPTIONS_TYPE = { 1098 "SCALE": ("EXTEND", "NOEXTEND"), 1099 "SHARD": ("EXTEND", "NOEXTEND"), 1100 "NO": ("CYCLE", "CACHE", "MAXVALUE", "MINVALUE"), 1101 **dict.fromkeys( 1102 ( 1103 "SESSION", 1104 "GLOBAL", 1105 "KEEP", 1106 "NOKEEP", 1107 "ORDER", 1108 "NOORDER", 1109 "NOCACHE", 1110 "CYCLE", 1111 "NOCYCLE", 1112 "NOMINVALUE", 1113 "NOMAXVALUE", 1114 "NOSCALE", 1115 "NOSHARD", 1116 ), 1117 tuple(), 1118 ), 1119 } 1120 1121 ISOLATED_LOADING_OPTIONS: OPTIONS_TYPE = {"FOR": ("ALL", "INSERT", "NONE")} 1122 1123 USABLES: OPTIONS_TYPE = dict.fromkeys(("ROLE", "WAREHOUSE", "DATABASE", "SCHEMA"), tuple()) 1124 1125 CAST_ACTIONS: OPTIONS_TYPE = dict.fromkeys(("RENAME", "ADD"), ("FIELDS",)) 1126 1127 INSERT_ALTERNATIVES = {"ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK"} 1128 1129 CLONE_KEYWORDS = {"CLONE", "COPY"} 1130 HISTORICAL_DATA_KIND = {"TIMESTAMP", "OFFSET", "STATEMENT", "STREAM"} 1131 1132 OPCLASS_FOLLOW_KEYWORDS = {"ASC", "DESC", "NULLS", "WITH"} 1133 1134 OPTYPE_FOLLOW_TOKENS = {TokenType.COMMA, TokenType.R_PAREN} 1135 1136 TABLE_INDEX_HINT_TOKENS = {TokenType.FORCE, TokenType.IGNORE, TokenType.USE} 1137 1138 VIEW_ATTRIBUTES = {"ENCRYPTION", "SCHEMABINDING", "VIEW_METADATA"} 1139 1140 WINDOW_ALIAS_TOKENS = ID_VAR_TOKENS - {TokenType.ROWS} 1141 WINDOW_BEFORE_PAREN_TOKENS = {TokenType.OVER} 1142 WINDOW_SIDES = {"FOLLOWING", "PRECEDING"} 1143 1144 JSON_KEY_VALUE_SEPARATOR_TOKENS = {TokenType.COLON, TokenType.COMMA, TokenType.IS} 1145 1146 FETCH_TOKENS = ID_VAR_TOKENS - {TokenType.ROW, TokenType.ROWS, TokenType.PERCENT} 1147 1148 ADD_CONSTRAINT_TOKENS = { 1149 TokenType.CONSTRAINT, 1150 TokenType.FOREIGN_KEY, 1151 TokenType.INDEX, 1152 TokenType.KEY, 1153 TokenType.PRIMARY_KEY, 1154 TokenType.UNIQUE, 1155 } 1156 1157 DISTINCT_TOKENS = {TokenType.DISTINCT} 1158 1159 NULL_TOKENS = {TokenType.NULL} 1160 1161 UNNEST_OFFSET_ALIAS_TOKENS = ID_VAR_TOKENS - SET_OPERATIONS 1162 1163 SELECT_START_TOKENS = {TokenType.L_PAREN, TokenType.WITH, TokenType.SELECT} 1164 1165 COPY_INTO_VARLEN_OPTIONS = {"FILE_FORMAT", "COPY_OPTIONS", "FORMAT_OPTIONS", "CREDENTIAL"} 1166 1167 STRICT_CAST = True 1168 1169 PREFIXED_PIVOT_COLUMNS = False 1170 IDENTIFY_PIVOT_STRINGS = False 1171 1172 LOG_DEFAULTS_TO_LN = False 1173 1174 # Whether ADD is present for each column added by ALTER TABLE 1175 ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN = True 1176 1177 # Whether the table sample clause expects CSV syntax 1178 TABLESAMPLE_CSV = False 1179 1180 # The default method used for table sampling 1181 DEFAULT_SAMPLING_METHOD: t.Optional[str] = None 1182 1183 # Whether the SET command needs a delimiter (e.g. "=") for assignments 1184 SET_REQUIRES_ASSIGNMENT_DELIMITER = True 1185 1186 # Whether the TRIM function expects the characters to trim as its first argument 1187 TRIM_PATTERN_FIRST = False 1188 1189 # Whether string aliases are supported `SELECT COUNT(*) 'count'` 1190 STRING_ALIASES = False 1191 1192 # Whether query modifiers such as LIMIT are attached to the UNION node (vs its right operand) 1193 MODIFIERS_ATTACHED_TO_SET_OP = True 1194 SET_OP_MODIFIERS = {"order", "limit", "offset"} 1195 1196 # Whether to parse IF statements that aren't followed by a left parenthesis as commands 1197 NO_PAREN_IF_COMMANDS = True 1198 1199 # Whether the -> and ->> operators expect documents of type JSON (e.g. Postgres) 1200 JSON_ARROWS_REQUIRE_JSON_TYPE = False 1201 1202 # Whether the `:` operator is used to extract a value from a JSON document 1203 COLON_IS_JSON_EXTRACT = False 1204 1205 # Whether or not a VALUES keyword needs to be followed by '(' to form a VALUES clause. 1206 # If this is True and '(' is not found, the keyword will be treated as an identifier 1207 VALUES_FOLLOWED_BY_PAREN = True 1208 1209 # Whether implicit unnesting is supported, e.g. SELECT 1 FROM y.z AS z, z.a (Redshift) 1210 SUPPORTS_IMPLICIT_UNNEST = False 1211 1212 # Whether or not interval spans are supported, INTERVAL 1 YEAR TO MONTHS 1213 INTERVAL_SPANS = True 1214 1215 # Whether a PARTITION clause can follow a table reference 1216 SUPPORTS_PARTITION_SELECTION = False 1217 1218 __slots__ = ( 1219 "error_level", 1220 "error_message_context", 1221 "max_errors", 1222 "dialect", 1223 "sql", 1224 "errors", 1225 "_tokens", 1226 "_index", 1227 "_curr", 1228 "_next", 1229 "_prev", 1230 "_prev_comments", 1231 ) 1232 1233 # Autofilled 1234 SHOW_TRIE: t.Dict = {} 1235 SET_TRIE: t.Dict = {} 1236 1237 def __init__( 1238 self, 1239 error_level: t.Optional[ErrorLevel] = None, 1240 error_message_context: int = 100, 1241 max_errors: int = 3, 1242 dialect: DialectType = None, 1243 ): 1244 from sqlglot.dialects import Dialect 1245 1246 self.error_level = error_level or ErrorLevel.IMMEDIATE 1247 self.error_message_context = error_message_context 1248 self.max_errors = max_errors 1249 self.dialect = Dialect.get_or_raise(dialect) 1250 self.reset() 1251 1252 def reset(self): 1253 self.sql = "" 1254 self.errors = [] 1255 self._tokens = [] 1256 self._index = 0 1257 self._curr = None 1258 self._next = None 1259 self._prev = None 1260 self._prev_comments = None 1261 1262 def parse( 1263 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 1264 ) -> t.List[t.Optional[exp.Expression]]: 1265 """ 1266 Parses a list of tokens and returns a list of syntax trees, one tree 1267 per parsed SQL statement. 1268 1269 Args: 1270 raw_tokens: The list of tokens. 1271 sql: The original SQL string, used to produce helpful debug messages. 1272 1273 Returns: 1274 The list of the produced syntax trees. 1275 """ 1276 return self._parse( 1277 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 1278 ) 1279 1280 def parse_into( 1281 self, 1282 expression_types: exp.IntoType, 1283 raw_tokens: t.List[Token], 1284 sql: t.Optional[str] = None, 1285 ) -> t.List[t.Optional[exp.Expression]]: 1286 """ 1287 Parses a list of tokens into a given Expression type. If a collection of Expression 1288 types is given instead, this method will try to parse the token list into each one 1289 of them, stopping at the first for which the parsing succeeds. 1290 1291 Args: 1292 expression_types: The expression type(s) to try and parse the token list into. 1293 raw_tokens: The list of tokens. 1294 sql: The original SQL string, used to produce helpful debug messages. 1295 1296 Returns: 1297 The target Expression. 1298 """ 1299 errors = [] 1300 for expression_type in ensure_list(expression_types): 1301 parser = self.EXPRESSION_PARSERS.get(expression_type) 1302 if not parser: 1303 raise TypeError(f"No parser registered for {expression_type}") 1304 1305 try: 1306 return self._parse(parser, raw_tokens, sql) 1307 except ParseError as e: 1308 e.errors[0]["into_expression"] = expression_type 1309 errors.append(e) 1310 1311 raise ParseError( 1312 f"Failed to parse '{sql or raw_tokens}' into {expression_types}", 1313 errors=merge_errors(errors), 1314 ) from errors[-1] 1315 1316 def _parse( 1317 self, 1318 parse_method: t.Callable[[Parser], t.Optional[exp.Expression]], 1319 raw_tokens: t.List[Token], 1320 sql: t.Optional[str] = None, 1321 ) -> t.List[t.Optional[exp.Expression]]: 1322 self.reset() 1323 self.sql = sql or "" 1324 1325 total = len(raw_tokens) 1326 chunks: t.List[t.List[Token]] = [[]] 1327 1328 for i, token in enumerate(raw_tokens): 1329 if token.token_type == TokenType.SEMICOLON: 1330 if token.comments: 1331 chunks.append([token]) 1332 1333 if i < total - 1: 1334 chunks.append([]) 1335 else: 1336 chunks[-1].append(token) 1337 1338 expressions = [] 1339 1340 for tokens in chunks: 1341 self._index = -1 1342 self._tokens = tokens 1343 self._advance() 1344 1345 expressions.append(parse_method(self)) 1346 1347 if self._index < len(self._tokens): 1348 self.raise_error("Invalid expression / Unexpected token") 1349 1350 self.check_errors() 1351 1352 return expressions 1353 1354 def check_errors(self) -> None: 1355 """Logs or raises any found errors, depending on the chosen error level setting.""" 1356 if self.error_level == ErrorLevel.WARN: 1357 for error in self.errors: 1358 logger.error(str(error)) 1359 elif self.error_level == ErrorLevel.RAISE and self.errors: 1360 raise ParseError( 1361 concat_messages(self.errors, self.max_errors), 1362 errors=merge_errors(self.errors), 1363 ) 1364 1365 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 1366 """ 1367 Appends an error in the list of recorded errors or raises it, depending on the chosen 1368 error level setting. 1369 """ 1370 token = token or self._curr or self._prev or Token.string("") 1371 start = token.start 1372 end = token.end + 1 1373 start_context = self.sql[max(start - self.error_message_context, 0) : start] 1374 highlight = self.sql[start:end] 1375 end_context = self.sql[end : end + self.error_message_context] 1376 1377 error = ParseError.new( 1378 f"{message}. Line {token.line}, Col: {token.col}.\n" 1379 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 1380 description=message, 1381 line=token.line, 1382 col=token.col, 1383 start_context=start_context, 1384 highlight=highlight, 1385 end_context=end_context, 1386 ) 1387 1388 if self.error_level == ErrorLevel.IMMEDIATE: 1389 raise error 1390 1391 self.errors.append(error) 1392 1393 def expression( 1394 self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs 1395 ) -> E: 1396 """ 1397 Creates a new, validated Expression. 1398 1399 Args: 1400 exp_class: The expression class to instantiate. 1401 comments: An optional list of comments to attach to the expression. 1402 kwargs: The arguments to set for the expression along with their respective values. 1403 1404 Returns: 1405 The target expression. 1406 """ 1407 instance = exp_class(**kwargs) 1408 instance.add_comments(comments) if comments else self._add_comments(instance) 1409 return self.validate_expression(instance) 1410 1411 def _add_comments(self, expression: t.Optional[exp.Expression]) -> None: 1412 if expression and self._prev_comments: 1413 expression.add_comments(self._prev_comments) 1414 self._prev_comments = None 1415 1416 def validate_expression(self, expression: E, args: t.Optional[t.List] = None) -> E: 1417 """ 1418 Validates an Expression, making sure that all its mandatory arguments are set. 1419 1420 Args: 1421 expression: The expression to validate. 1422 args: An optional list of items that was used to instantiate the expression, if it's a Func. 1423 1424 Returns: 1425 The validated expression. 1426 """ 1427 if self.error_level != ErrorLevel.IGNORE: 1428 for error_message in expression.error_messages(args): 1429 self.raise_error(error_message) 1430 1431 return expression 1432 1433 def _find_sql(self, start: Token, end: Token) -> str: 1434 return self.sql[start.start : end.end + 1] 1435 1436 def _is_connected(self) -> bool: 1437 return self._prev and self._curr and self._prev.end + 1 == self._curr.start 1438 1439 def _advance(self, times: int = 1) -> None: 1440 self._index += times 1441 self._curr = seq_get(self._tokens, self._index) 1442 self._next = seq_get(self._tokens, self._index + 1) 1443 1444 if self._index > 0: 1445 self._prev = self._tokens[self._index - 1] 1446 self._prev_comments = self._prev.comments 1447 else: 1448 self._prev = None 1449 self._prev_comments = None 1450 1451 def _retreat(self, index: int) -> None: 1452 if index != self._index: 1453 self._advance(index - self._index) 1454 1455 def _warn_unsupported(self) -> None: 1456 if len(self._tokens) <= 1: 1457 return 1458 1459 # We use _find_sql because self.sql may comprise multiple chunks, and we're only 1460 # interested in emitting a warning for the one being currently processed. 1461 sql = self._find_sql(self._tokens[0], self._tokens[-1])[: self.error_message_context] 1462 1463 logger.warning( 1464 f"'{sql}' contains unsupported syntax. Falling back to parsing as a 'Command'." 1465 ) 1466 1467 def _parse_command(self) -> exp.Command: 1468 self._warn_unsupported() 1469 return self.expression( 1470 exp.Command, this=self._prev.text.upper(), expression=self._parse_string() 1471 ) 1472 1473 def _try_parse(self, parse_method: t.Callable[[], T], retreat: bool = False) -> t.Optional[T]: 1474 """ 1475 Attemps to backtrack if a parse function that contains a try/catch internally raises an error. This behavior can 1476 be different depending on the uset-set ErrorLevel, so _try_parse aims to solve this by setting & resetting 1477 the parser state accordingly 1478 """ 1479 index = self._index 1480 error_level = self.error_level 1481 1482 self.error_level = ErrorLevel.IMMEDIATE 1483 try: 1484 this = parse_method() 1485 except ParseError: 1486 this = None 1487 finally: 1488 if not this or retreat: 1489 self._retreat(index) 1490 self.error_level = error_level 1491 1492 return this 1493 1494 def _parse_comment(self, allow_exists: bool = True) -> exp.Expression: 1495 start = self._prev 1496 exists = self._parse_exists() if allow_exists else None 1497 1498 self._match(TokenType.ON) 1499 1500 materialized = self._match_text_seq("MATERIALIZED") 1501 kind = self._match_set(self.CREATABLES) and self._prev 1502 if not kind: 1503 return self._parse_as_command(start) 1504 1505 if kind.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1506 this = self._parse_user_defined_function(kind=kind.token_type) 1507 elif kind.token_type == TokenType.TABLE: 1508 this = self._parse_table(alias_tokens=self.COMMENT_TABLE_ALIAS_TOKENS) 1509 elif kind.token_type == TokenType.COLUMN: 1510 this = self._parse_column() 1511 else: 1512 this = self._parse_id_var() 1513 1514 self._match(TokenType.IS) 1515 1516 return self.expression( 1517 exp.Comment, 1518 this=this, 1519 kind=kind.text, 1520 expression=self._parse_string(), 1521 exists=exists, 1522 materialized=materialized, 1523 ) 1524 1525 def _parse_to_table( 1526 self, 1527 ) -> exp.ToTableProperty: 1528 table = self._parse_table_parts(schema=True) 1529 return self.expression(exp.ToTableProperty, this=table) 1530 1531 # https://clickhouse.com/docs/en/engines/table-engines/mergetree-family/mergetree#mergetree-table-ttl 1532 def _parse_ttl(self) -> exp.Expression: 1533 def _parse_ttl_action() -> t.Optional[exp.Expression]: 1534 this = self._parse_bitwise() 1535 1536 if self._match_text_seq("DELETE"): 1537 return self.expression(exp.MergeTreeTTLAction, this=this, delete=True) 1538 if self._match_text_seq("RECOMPRESS"): 1539 return self.expression( 1540 exp.MergeTreeTTLAction, this=this, recompress=self._parse_bitwise() 1541 ) 1542 if self._match_text_seq("TO", "DISK"): 1543 return self.expression( 1544 exp.MergeTreeTTLAction, this=this, to_disk=self._parse_string() 1545 ) 1546 if self._match_text_seq("TO", "VOLUME"): 1547 return self.expression( 1548 exp.MergeTreeTTLAction, this=this, to_volume=self._parse_string() 1549 ) 1550 1551 return this 1552 1553 expressions = self._parse_csv(_parse_ttl_action) 1554 where = self._parse_where() 1555 group = self._parse_group() 1556 1557 aggregates = None 1558 if group and self._match(TokenType.SET): 1559 aggregates = self._parse_csv(self._parse_set_item) 1560 1561 return self.expression( 1562 exp.MergeTreeTTL, 1563 expressions=expressions, 1564 where=where, 1565 group=group, 1566 aggregates=aggregates, 1567 ) 1568 1569 def _parse_statement(self) -> t.Optional[exp.Expression]: 1570 if self._curr is None: 1571 return None 1572 1573 if self._match_set(self.STATEMENT_PARSERS): 1574 return self.STATEMENT_PARSERS[self._prev.token_type](self) 1575 1576 if self._match_set(self.dialect.tokenizer.COMMANDS): 1577 return self._parse_command() 1578 1579 expression = self._parse_expression() 1580 expression = self._parse_set_operations(expression) if expression else self._parse_select() 1581 return self._parse_query_modifiers(expression) 1582 1583 def _parse_drop(self, exists: bool = False) -> exp.Drop | exp.Command: 1584 start = self._prev 1585 temporary = self._match(TokenType.TEMPORARY) 1586 materialized = self._match_text_seq("MATERIALIZED") 1587 1588 kind = self._match_set(self.CREATABLES) and self._prev.text 1589 if not kind: 1590 return self._parse_as_command(start) 1591 1592 if_exists = exists or self._parse_exists() 1593 table = self._parse_table_parts( 1594 schema=True, is_db_reference=self._prev.token_type == TokenType.SCHEMA 1595 ) 1596 1597 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 1598 1599 if self._match(TokenType.L_PAREN, advance=False): 1600 expressions = self._parse_wrapped_csv(self._parse_types) 1601 else: 1602 expressions = None 1603 1604 return self.expression( 1605 exp.Drop, 1606 comments=start.comments, 1607 exists=if_exists, 1608 this=table, 1609 expressions=expressions, 1610 kind=kind.upper(), 1611 temporary=temporary, 1612 materialized=materialized, 1613 cascade=self._match_text_seq("CASCADE"), 1614 constraints=self._match_text_seq("CONSTRAINTS"), 1615 purge=self._match_text_seq("PURGE"), 1616 cluster=cluster, 1617 ) 1618 1619 def _parse_exists(self, not_: bool = False) -> t.Optional[bool]: 1620 return ( 1621 self._match_text_seq("IF") 1622 and (not not_ or self._match(TokenType.NOT)) 1623 and self._match(TokenType.EXISTS) 1624 ) 1625 1626 def _parse_create(self) -> exp.Create | exp.Command: 1627 # Note: this can't be None because we've matched a statement parser 1628 start = self._prev 1629 comments = self._prev_comments 1630 1631 replace = ( 1632 start.token_type == TokenType.REPLACE 1633 or self._match_pair(TokenType.OR, TokenType.REPLACE) 1634 or self._match_pair(TokenType.OR, TokenType.ALTER) 1635 ) 1636 1637 unique = self._match(TokenType.UNIQUE) 1638 1639 if self._match_pair(TokenType.TABLE, TokenType.FUNCTION, advance=False): 1640 self._advance() 1641 1642 properties = None 1643 create_token = self._match_set(self.CREATABLES) and self._prev 1644 1645 if not create_token: 1646 # exp.Properties.Location.POST_CREATE 1647 properties = self._parse_properties() 1648 create_token = self._match_set(self.CREATABLES) and self._prev 1649 1650 if not properties or not create_token: 1651 return self._parse_as_command(start) 1652 1653 exists = self._parse_exists(not_=True) 1654 this = None 1655 expression: t.Optional[exp.Expression] = None 1656 indexes = None 1657 no_schema_binding = None 1658 begin = None 1659 end = None 1660 clone = None 1661 1662 def extend_props(temp_props: t.Optional[exp.Properties]) -> None: 1663 nonlocal properties 1664 if properties and temp_props: 1665 properties.expressions.extend(temp_props.expressions) 1666 elif temp_props: 1667 properties = temp_props 1668 1669 if create_token.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1670 this = self._parse_user_defined_function(kind=create_token.token_type) 1671 1672 # exp.Properties.Location.POST_SCHEMA ("schema" here is the UDF's type signature) 1673 extend_props(self._parse_properties()) 1674 1675 expression = self._match(TokenType.ALIAS) and self._parse_heredoc() 1676 extend_props(self._parse_properties()) 1677 1678 if not expression: 1679 if self._match(TokenType.COMMAND): 1680 expression = self._parse_as_command(self._prev) 1681 else: 1682 begin = self._match(TokenType.BEGIN) 1683 return_ = self._match_text_seq("RETURN") 1684 1685 if self._match(TokenType.STRING, advance=False): 1686 # Takes care of BigQuery's JavaScript UDF definitions that end in an OPTIONS property 1687 # # https://cloud.google.com/bigquery/docs/reference/standard-sql/data-definition-language#create_function_statement 1688 expression = self._parse_string() 1689 extend_props(self._parse_properties()) 1690 else: 1691 expression = self._parse_statement() 1692 1693 end = self._match_text_seq("END") 1694 1695 if return_: 1696 expression = self.expression(exp.Return, this=expression) 1697 elif create_token.token_type == TokenType.INDEX: 1698 # Postgres allows anonymous indexes, eg. CREATE INDEX IF NOT EXISTS ON t(c) 1699 if not self._match(TokenType.ON): 1700 index = self._parse_id_var() 1701 anonymous = False 1702 else: 1703 index = None 1704 anonymous = True 1705 1706 this = self._parse_index(index=index, anonymous=anonymous) 1707 elif create_token.token_type in self.DB_CREATABLES: 1708 table_parts = self._parse_table_parts( 1709 schema=True, is_db_reference=create_token.token_type == TokenType.SCHEMA 1710 ) 1711 1712 # exp.Properties.Location.POST_NAME 1713 self._match(TokenType.COMMA) 1714 extend_props(self._parse_properties(before=True)) 1715 1716 this = self._parse_schema(this=table_parts) 1717 1718 # exp.Properties.Location.POST_SCHEMA and POST_WITH 1719 extend_props(self._parse_properties()) 1720 1721 self._match(TokenType.ALIAS) 1722 if not self._match_set(self.DDL_SELECT_TOKENS, advance=False): 1723 # exp.Properties.Location.POST_ALIAS 1724 extend_props(self._parse_properties()) 1725 1726 if create_token.token_type == TokenType.SEQUENCE: 1727 expression = self._parse_types() 1728 extend_props(self._parse_properties()) 1729 else: 1730 expression = self._parse_ddl_select() 1731 1732 if create_token.token_type == TokenType.TABLE: 1733 # exp.Properties.Location.POST_EXPRESSION 1734 extend_props(self._parse_properties()) 1735 1736 indexes = [] 1737 while True: 1738 index = self._parse_index() 1739 1740 # exp.Properties.Location.POST_INDEX 1741 extend_props(self._parse_properties()) 1742 1743 if not index: 1744 break 1745 else: 1746 self._match(TokenType.COMMA) 1747 indexes.append(index) 1748 elif create_token.token_type == TokenType.VIEW: 1749 if self._match_text_seq("WITH", "NO", "SCHEMA", "BINDING"): 1750 no_schema_binding = True 1751 1752 shallow = self._match_text_seq("SHALLOW") 1753 1754 if self._match_texts(self.CLONE_KEYWORDS): 1755 copy = self._prev.text.lower() == "copy" 1756 clone = self.expression( 1757 exp.Clone, this=self._parse_table(schema=True), shallow=shallow, copy=copy 1758 ) 1759 1760 if self._curr and not self._match_set((TokenType.R_PAREN, TokenType.COMMA), advance=False): 1761 return self._parse_as_command(start) 1762 1763 return self.expression( 1764 exp.Create, 1765 comments=comments, 1766 this=this, 1767 kind=create_token.text.upper(), 1768 replace=replace, 1769 unique=unique, 1770 expression=expression, 1771 exists=exists, 1772 properties=properties, 1773 indexes=indexes, 1774 no_schema_binding=no_schema_binding, 1775 begin=begin, 1776 end=end, 1777 clone=clone, 1778 ) 1779 1780 def _parse_sequence_properties(self) -> t.Optional[exp.SequenceProperties]: 1781 seq = exp.SequenceProperties() 1782 1783 options = [] 1784 index = self._index 1785 1786 while self._curr: 1787 self._match(TokenType.COMMA) 1788 if self._match_text_seq("INCREMENT"): 1789 self._match_text_seq("BY") 1790 self._match_text_seq("=") 1791 seq.set("increment", self._parse_term()) 1792 elif self._match_text_seq("MINVALUE"): 1793 seq.set("minvalue", self._parse_term()) 1794 elif self._match_text_seq("MAXVALUE"): 1795 seq.set("maxvalue", self._parse_term()) 1796 elif self._match(TokenType.START_WITH) or self._match_text_seq("START"): 1797 self._match_text_seq("=") 1798 seq.set("start", self._parse_term()) 1799 elif self._match_text_seq("CACHE"): 1800 # T-SQL allows empty CACHE which is initialized dynamically 1801 seq.set("cache", self._parse_number() or True) 1802 elif self._match_text_seq("OWNED", "BY"): 1803 # "OWNED BY NONE" is the default 1804 seq.set("owned", None if self._match_text_seq("NONE") else self._parse_column()) 1805 else: 1806 opt = self._parse_var_from_options(self.CREATE_SEQUENCE, raise_unmatched=False) 1807 if opt: 1808 options.append(opt) 1809 else: 1810 break 1811 1812 seq.set("options", options if options else None) 1813 return None if self._index == index else seq 1814 1815 def _parse_property_before(self) -> t.Optional[exp.Expression]: 1816 # only used for teradata currently 1817 self._match(TokenType.COMMA) 1818 1819 kwargs = { 1820 "no": self._match_text_seq("NO"), 1821 "dual": self._match_text_seq("DUAL"), 1822 "before": self._match_text_seq("BEFORE"), 1823 "default": self._match_text_seq("DEFAULT"), 1824 "local": (self._match_text_seq("LOCAL") and "LOCAL") 1825 or (self._match_text_seq("NOT", "LOCAL") and "NOT LOCAL"), 1826 "after": self._match_text_seq("AFTER"), 1827 "minimum": self._match_texts(("MIN", "MINIMUM")), 1828 "maximum": self._match_texts(("MAX", "MAXIMUM")), 1829 } 1830 1831 if self._match_texts(self.PROPERTY_PARSERS): 1832 parser = self.PROPERTY_PARSERS[self._prev.text.upper()] 1833 try: 1834 return parser(self, **{k: v for k, v in kwargs.items() if v}) 1835 except TypeError: 1836 self.raise_error(f"Cannot parse property '{self._prev.text}'") 1837 1838 return None 1839 1840 def _parse_wrapped_properties(self) -> t.List[exp.Expression]: 1841 return self._parse_wrapped_csv(self._parse_property) 1842 1843 def _parse_property(self) -> t.Optional[exp.Expression]: 1844 if self._match_texts(self.PROPERTY_PARSERS): 1845 return self.PROPERTY_PARSERS[self._prev.text.upper()](self) 1846 1847 if self._match(TokenType.DEFAULT) and self._match_texts(self.PROPERTY_PARSERS): 1848 return self.PROPERTY_PARSERS[self._prev.text.upper()](self, default=True) 1849 1850 if self._match_text_seq("COMPOUND", "SORTKEY"): 1851 return self._parse_sortkey(compound=True) 1852 1853 if self._match_text_seq("SQL", "SECURITY"): 1854 return self.expression(exp.SqlSecurityProperty, definer=self._match_text_seq("DEFINER")) 1855 1856 index = self._index 1857 key = self._parse_column() 1858 1859 if not self._match(TokenType.EQ): 1860 self._retreat(index) 1861 return self._parse_sequence_properties() 1862 1863 # Transform the key to exp.Dot if it's dotted identifiers wrapped in exp.Column or to exp.Var otherwise 1864 if isinstance(key, exp.Column): 1865 key = key.to_dot() if len(key.parts) > 1 else exp.var(key.name) 1866 1867 value = self._parse_bitwise() or self._parse_var(any_token=True) 1868 1869 # Transform the value to exp.Var if it was parsed as exp.Column(exp.Identifier()) 1870 if isinstance(value, exp.Column): 1871 value = exp.var(value.name) 1872 1873 return self.expression(exp.Property, this=key, value=value) 1874 1875 def _parse_stored(self) -> exp.FileFormatProperty: 1876 self._match(TokenType.ALIAS) 1877 1878 input_format = self._parse_string() if self._match_text_seq("INPUTFORMAT") else None 1879 output_format = self._parse_string() if self._match_text_seq("OUTPUTFORMAT") else None 1880 1881 return self.expression( 1882 exp.FileFormatProperty, 1883 this=( 1884 self.expression( 1885 exp.InputOutputFormat, input_format=input_format, output_format=output_format 1886 ) 1887 if input_format or output_format 1888 else self._parse_var_or_string() or self._parse_number() or self._parse_id_var() 1889 ), 1890 ) 1891 1892 def _parse_unquoted_field(self) -> t.Optional[exp.Expression]: 1893 field = self._parse_field() 1894 if isinstance(field, exp.Identifier) and not field.quoted: 1895 field = exp.var(field) 1896 1897 return field 1898 1899 def _parse_property_assignment(self, exp_class: t.Type[E], **kwargs: t.Any) -> E: 1900 self._match(TokenType.EQ) 1901 self._match(TokenType.ALIAS) 1902 1903 return self.expression(exp_class, this=self._parse_unquoted_field(), **kwargs) 1904 1905 def _parse_properties(self, before: t.Optional[bool] = None) -> t.Optional[exp.Properties]: 1906 properties = [] 1907 while True: 1908 if before: 1909 prop = self._parse_property_before() 1910 else: 1911 prop = self._parse_property() 1912 if not prop: 1913 break 1914 for p in ensure_list(prop): 1915 properties.append(p) 1916 1917 if properties: 1918 return self.expression(exp.Properties, expressions=properties) 1919 1920 return None 1921 1922 def _parse_fallback(self, no: bool = False) -> exp.FallbackProperty: 1923 return self.expression( 1924 exp.FallbackProperty, no=no, protection=self._match_text_seq("PROTECTION") 1925 ) 1926 1927 def _parse_volatile_property(self) -> exp.VolatileProperty | exp.StabilityProperty: 1928 if self._index >= 2: 1929 pre_volatile_token = self._tokens[self._index - 2] 1930 else: 1931 pre_volatile_token = None 1932 1933 if pre_volatile_token and pre_volatile_token.token_type in self.PRE_VOLATILE_TOKENS: 1934 return exp.VolatileProperty() 1935 1936 return self.expression(exp.StabilityProperty, this=exp.Literal.string("VOLATILE")) 1937 1938 def _parse_retention_period(self) -> exp.Var: 1939 # Parse TSQL's HISTORY_RETENTION_PERIOD: {INFINITE | <number> DAY | DAYS | MONTH ...} 1940 number = self._parse_number() 1941 number_str = f"{number} " if number else "" 1942 unit = self._parse_var(any_token=True) 1943 return exp.var(f"{number_str}{unit}") 1944 1945 def _parse_system_versioning_property( 1946 self, with_: bool = False 1947 ) -> exp.WithSystemVersioningProperty: 1948 self._match(TokenType.EQ) 1949 prop = self.expression( 1950 exp.WithSystemVersioningProperty, 1951 **{ # type: ignore 1952 "on": True, 1953 "with": with_, 1954 }, 1955 ) 1956 1957 if self._match_text_seq("OFF"): 1958 prop.set("on", False) 1959 return prop 1960 1961 self._match(TokenType.ON) 1962 if self._match(TokenType.L_PAREN): 1963 while self._curr and not self._match(TokenType.R_PAREN): 1964 if self._match_text_seq("HISTORY_TABLE", "="): 1965 prop.set("this", self._parse_table_parts()) 1966 elif self._match_text_seq("DATA_CONSISTENCY_CHECK", "="): 1967 prop.set("data_consistency", self._advance_any() and self._prev.text.upper()) 1968 elif self._match_text_seq("HISTORY_RETENTION_PERIOD", "="): 1969 prop.set("retention_period", self._parse_retention_period()) 1970 1971 self._match(TokenType.COMMA) 1972 1973 return prop 1974 1975 def _parse_data_deletion_property(self) -> exp.DataDeletionProperty: 1976 self._match(TokenType.EQ) 1977 on = self._match_text_seq("ON") or not self._match_text_seq("OFF") 1978 prop = self.expression(exp.DataDeletionProperty, on=on) 1979 1980 if self._match(TokenType.L_PAREN): 1981 while self._curr and not self._match(TokenType.R_PAREN): 1982 if self._match_text_seq("FILTER_COLUMN", "="): 1983 prop.set("filter_column", self._parse_column()) 1984 elif self._match_text_seq("RETENTION_PERIOD", "="): 1985 prop.set("retention_period", self._parse_retention_period()) 1986 1987 self._match(TokenType.COMMA) 1988 1989 return prop 1990 1991 def _parse_with_property(self) -> t.Optional[exp.Expression] | t.List[exp.Expression]: 1992 if self._match_text_seq("(", "SYSTEM_VERSIONING"): 1993 prop = self._parse_system_versioning_property(with_=True) 1994 self._match_r_paren() 1995 return prop 1996 1997 if self._match(TokenType.L_PAREN, advance=False): 1998 return self._parse_wrapped_properties() 1999 2000 if self._match_text_seq("JOURNAL"): 2001 return self._parse_withjournaltable() 2002 2003 if self._match_texts(self.VIEW_ATTRIBUTES): 2004 return self.expression(exp.ViewAttributeProperty, this=self._prev.text.upper()) 2005 2006 if self._match_text_seq("DATA"): 2007 return self._parse_withdata(no=False) 2008 elif self._match_text_seq("NO", "DATA"): 2009 return self._parse_withdata(no=True) 2010 2011 if self._match(TokenType.SERDE_PROPERTIES, advance=False): 2012 return self._parse_serde_properties(with_=True) 2013 2014 if not self._next: 2015 return None 2016 2017 return self._parse_withisolatedloading() 2018 2019 # https://dev.mysql.com/doc/refman/8.0/en/create-view.html 2020 def _parse_definer(self) -> t.Optional[exp.DefinerProperty]: 2021 self._match(TokenType.EQ) 2022 2023 user = self._parse_id_var() 2024 self._match(TokenType.PARAMETER) 2025 host = self._parse_id_var() or (self._match(TokenType.MOD) and self._prev.text) 2026 2027 if not user or not host: 2028 return None 2029 2030 return exp.DefinerProperty(this=f"{user}@{host}") 2031 2032 def _parse_withjournaltable(self) -> exp.WithJournalTableProperty: 2033 self._match(TokenType.TABLE) 2034 self._match(TokenType.EQ) 2035 return self.expression(exp.WithJournalTableProperty, this=self._parse_table_parts()) 2036 2037 def _parse_log(self, no: bool = False) -> exp.LogProperty: 2038 return self.expression(exp.LogProperty, no=no) 2039 2040 def _parse_journal(self, **kwargs) -> exp.JournalProperty: 2041 return self.expression(exp.JournalProperty, **kwargs) 2042 2043 def _parse_checksum(self) -> exp.ChecksumProperty: 2044 self._match(TokenType.EQ) 2045 2046 on = None 2047 if self._match(TokenType.ON): 2048 on = True 2049 elif self._match_text_seq("OFF"): 2050 on = False 2051 2052 return self.expression(exp.ChecksumProperty, on=on, default=self._match(TokenType.DEFAULT)) 2053 2054 def _parse_cluster(self, wrapped: bool = False) -> exp.Cluster: 2055 return self.expression( 2056 exp.Cluster, 2057 expressions=( 2058 self._parse_wrapped_csv(self._parse_ordered) 2059 if wrapped 2060 else self._parse_csv(self._parse_ordered) 2061 ), 2062 ) 2063 2064 def _parse_clustered_by(self) -> exp.ClusteredByProperty: 2065 self._match_text_seq("BY") 2066 2067 self._match_l_paren() 2068 expressions = self._parse_csv(self._parse_column) 2069 self._match_r_paren() 2070 2071 if self._match_text_seq("SORTED", "BY"): 2072 self._match_l_paren() 2073 sorted_by = self._parse_csv(self._parse_ordered) 2074 self._match_r_paren() 2075 else: 2076 sorted_by = None 2077 2078 self._match(TokenType.INTO) 2079 buckets = self._parse_number() 2080 self._match_text_seq("BUCKETS") 2081 2082 return self.expression( 2083 exp.ClusteredByProperty, 2084 expressions=expressions, 2085 sorted_by=sorted_by, 2086 buckets=buckets, 2087 ) 2088 2089 def _parse_copy_property(self) -> t.Optional[exp.CopyGrantsProperty]: 2090 if not self._match_text_seq("GRANTS"): 2091 self._retreat(self._index - 1) 2092 return None 2093 2094 return self.expression(exp.CopyGrantsProperty) 2095 2096 def _parse_freespace(self) -> exp.FreespaceProperty: 2097 self._match(TokenType.EQ) 2098 return self.expression( 2099 exp.FreespaceProperty, this=self._parse_number(), percent=self._match(TokenType.PERCENT) 2100 ) 2101 2102 def _parse_mergeblockratio( 2103 self, no: bool = False, default: bool = False 2104 ) -> exp.MergeBlockRatioProperty: 2105 if self._match(TokenType.EQ): 2106 return self.expression( 2107 exp.MergeBlockRatioProperty, 2108 this=self._parse_number(), 2109 percent=self._match(TokenType.PERCENT), 2110 ) 2111 2112 return self.expression(exp.MergeBlockRatioProperty, no=no, default=default) 2113 2114 def _parse_datablocksize( 2115 self, 2116 default: t.Optional[bool] = None, 2117 minimum: t.Optional[bool] = None, 2118 maximum: t.Optional[bool] = None, 2119 ) -> exp.DataBlocksizeProperty: 2120 self._match(TokenType.EQ) 2121 size = self._parse_number() 2122 2123 units = None 2124 if self._match_texts(("BYTES", "KBYTES", "KILOBYTES")): 2125 units = self._prev.text 2126 2127 return self.expression( 2128 exp.DataBlocksizeProperty, 2129 size=size, 2130 units=units, 2131 default=default, 2132 minimum=minimum, 2133 maximum=maximum, 2134 ) 2135 2136 def _parse_blockcompression(self) -> exp.BlockCompressionProperty: 2137 self._match(TokenType.EQ) 2138 always = self._match_text_seq("ALWAYS") 2139 manual = self._match_text_seq("MANUAL") 2140 never = self._match_text_seq("NEVER") 2141 default = self._match_text_seq("DEFAULT") 2142 2143 autotemp = None 2144 if self._match_text_seq("AUTOTEMP"): 2145 autotemp = self._parse_schema() 2146 2147 return self.expression( 2148 exp.BlockCompressionProperty, 2149 always=always, 2150 manual=manual, 2151 never=never, 2152 default=default, 2153 autotemp=autotemp, 2154 ) 2155 2156 def _parse_withisolatedloading(self) -> t.Optional[exp.IsolatedLoadingProperty]: 2157 index = self._index 2158 no = self._match_text_seq("NO") 2159 concurrent = self._match_text_seq("CONCURRENT") 2160 2161 if not self._match_text_seq("ISOLATED", "LOADING"): 2162 self._retreat(index) 2163 return None 2164 2165 target = self._parse_var_from_options(self.ISOLATED_LOADING_OPTIONS, raise_unmatched=False) 2166 return self.expression( 2167 exp.IsolatedLoadingProperty, no=no, concurrent=concurrent, target=target 2168 ) 2169 2170 def _parse_locking(self) -> exp.LockingProperty: 2171 if self._match(TokenType.TABLE): 2172 kind = "TABLE" 2173 elif self._match(TokenType.VIEW): 2174 kind = "VIEW" 2175 elif self._match(TokenType.ROW): 2176 kind = "ROW" 2177 elif self._match_text_seq("DATABASE"): 2178 kind = "DATABASE" 2179 else: 2180 kind = None 2181 2182 if kind in ("DATABASE", "TABLE", "VIEW"): 2183 this = self._parse_table_parts() 2184 else: 2185 this = None 2186 2187 if self._match(TokenType.FOR): 2188 for_or_in = "FOR" 2189 elif self._match(TokenType.IN): 2190 for_or_in = "IN" 2191 else: 2192 for_or_in = None 2193 2194 if self._match_text_seq("ACCESS"): 2195 lock_type = "ACCESS" 2196 elif self._match_texts(("EXCL", "EXCLUSIVE")): 2197 lock_type = "EXCLUSIVE" 2198 elif self._match_text_seq("SHARE"): 2199 lock_type = "SHARE" 2200 elif self._match_text_seq("READ"): 2201 lock_type = "READ" 2202 elif self._match_text_seq("WRITE"): 2203 lock_type = "WRITE" 2204 elif self._match_text_seq("CHECKSUM"): 2205 lock_type = "CHECKSUM" 2206 else: 2207 lock_type = None 2208 2209 override = self._match_text_seq("OVERRIDE") 2210 2211 return self.expression( 2212 exp.LockingProperty, 2213 this=this, 2214 kind=kind, 2215 for_or_in=for_or_in, 2216 lock_type=lock_type, 2217 override=override, 2218 ) 2219 2220 def _parse_partition_by(self) -> t.List[exp.Expression]: 2221 if self._match(TokenType.PARTITION_BY): 2222 return self._parse_csv(self._parse_assignment) 2223 return [] 2224 2225 def _parse_partition_bound_spec(self) -> exp.PartitionBoundSpec: 2226 def _parse_partition_bound_expr() -> t.Optional[exp.Expression]: 2227 if self._match_text_seq("MINVALUE"): 2228 return exp.var("MINVALUE") 2229 if self._match_text_seq("MAXVALUE"): 2230 return exp.var("MAXVALUE") 2231 return self._parse_bitwise() 2232 2233 this: t.Optional[exp.Expression | t.List[exp.Expression]] = None 2234 expression = None 2235 from_expressions = None 2236 to_expressions = None 2237 2238 if self._match(TokenType.IN): 2239 this = self._parse_wrapped_csv(self._parse_bitwise) 2240 elif self._match(TokenType.FROM): 2241 from_expressions = self._parse_wrapped_csv(_parse_partition_bound_expr) 2242 self._match_text_seq("TO") 2243 to_expressions = self._parse_wrapped_csv(_parse_partition_bound_expr) 2244 elif self._match_text_seq("WITH", "(", "MODULUS"): 2245 this = self._parse_number() 2246 self._match_text_seq(",", "REMAINDER") 2247 expression = self._parse_number() 2248 self._match_r_paren() 2249 else: 2250 self.raise_error("Failed to parse partition bound spec.") 2251 2252 return self.expression( 2253 exp.PartitionBoundSpec, 2254 this=this, 2255 expression=expression, 2256 from_expressions=from_expressions, 2257 to_expressions=to_expressions, 2258 ) 2259 2260 # https://www.postgresql.org/docs/current/sql-createtable.html 2261 def _parse_partitioned_of(self) -> t.Optional[exp.PartitionedOfProperty]: 2262 if not self._match_text_seq("OF"): 2263 self._retreat(self._index - 1) 2264 return None 2265 2266 this = self._parse_table(schema=True) 2267 2268 if self._match(TokenType.DEFAULT): 2269 expression: exp.Var | exp.PartitionBoundSpec = exp.var("DEFAULT") 2270 elif self._match_text_seq("FOR", "VALUES"): 2271 expression = self._parse_partition_bound_spec() 2272 else: 2273 self.raise_error("Expecting either DEFAULT or FOR VALUES clause.") 2274 2275 return self.expression(exp.PartitionedOfProperty, this=this, expression=expression) 2276 2277 def _parse_partitioned_by(self) -> exp.PartitionedByProperty: 2278 self._match(TokenType.EQ) 2279 return self.expression( 2280 exp.PartitionedByProperty, 2281 this=self._parse_schema() or self._parse_bracket(self._parse_field()), 2282 ) 2283 2284 def _parse_withdata(self, no: bool = False) -> exp.WithDataProperty: 2285 if self._match_text_seq("AND", "STATISTICS"): 2286 statistics = True 2287 elif self._match_text_seq("AND", "NO", "STATISTICS"): 2288 statistics = False 2289 else: 2290 statistics = None 2291 2292 return self.expression(exp.WithDataProperty, no=no, statistics=statistics) 2293 2294 def _parse_contains_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2295 if self._match_text_seq("SQL"): 2296 return self.expression(exp.SqlReadWriteProperty, this="CONTAINS SQL") 2297 return None 2298 2299 def _parse_modifies_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2300 if self._match_text_seq("SQL", "DATA"): 2301 return self.expression(exp.SqlReadWriteProperty, this="MODIFIES SQL DATA") 2302 return None 2303 2304 def _parse_no_property(self) -> t.Optional[exp.Expression]: 2305 if self._match_text_seq("PRIMARY", "INDEX"): 2306 return exp.NoPrimaryIndexProperty() 2307 if self._match_text_seq("SQL"): 2308 return self.expression(exp.SqlReadWriteProperty, this="NO SQL") 2309 return None 2310 2311 def _parse_on_property(self) -> t.Optional[exp.Expression]: 2312 if self._match_text_seq("COMMIT", "PRESERVE", "ROWS"): 2313 return exp.OnCommitProperty() 2314 if self._match_text_seq("COMMIT", "DELETE", "ROWS"): 2315 return exp.OnCommitProperty(delete=True) 2316 return self.expression(exp.OnProperty, this=self._parse_schema(self._parse_id_var())) 2317 2318 def _parse_reads_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2319 if self._match_text_seq("SQL", "DATA"): 2320 return self.expression(exp.SqlReadWriteProperty, this="READS SQL DATA") 2321 return None 2322 2323 def _parse_distkey(self) -> exp.DistKeyProperty: 2324 return self.expression(exp.DistKeyProperty, this=self._parse_wrapped(self._parse_id_var)) 2325 2326 def _parse_create_like(self) -> t.Optional[exp.LikeProperty]: 2327 table = self._parse_table(schema=True) 2328 2329 options = [] 2330 while self._match_texts(("INCLUDING", "EXCLUDING")): 2331 this = self._prev.text.upper() 2332 2333 id_var = self._parse_id_var() 2334 if not id_var: 2335 return None 2336 2337 options.append( 2338 self.expression(exp.Property, this=this, value=exp.var(id_var.this.upper())) 2339 ) 2340 2341 return self.expression(exp.LikeProperty, this=table, expressions=options) 2342 2343 def _parse_sortkey(self, compound: bool = False) -> exp.SortKeyProperty: 2344 return self.expression( 2345 exp.SortKeyProperty, this=self._parse_wrapped_id_vars(), compound=compound 2346 ) 2347 2348 def _parse_character_set(self, default: bool = False) -> exp.CharacterSetProperty: 2349 self._match(TokenType.EQ) 2350 return self.expression( 2351 exp.CharacterSetProperty, this=self._parse_var_or_string(), default=default 2352 ) 2353 2354 def _parse_remote_with_connection(self) -> exp.RemoteWithConnectionModelProperty: 2355 self._match_text_seq("WITH", "CONNECTION") 2356 return self.expression( 2357 exp.RemoteWithConnectionModelProperty, this=self._parse_table_parts() 2358 ) 2359 2360 def _parse_returns(self) -> exp.ReturnsProperty: 2361 value: t.Optional[exp.Expression] 2362 null = None 2363 is_table = self._match(TokenType.TABLE) 2364 2365 if is_table: 2366 if self._match(TokenType.LT): 2367 value = self.expression( 2368 exp.Schema, 2369 this="TABLE", 2370 expressions=self._parse_csv(self._parse_struct_types), 2371 ) 2372 if not self._match(TokenType.GT): 2373 self.raise_error("Expecting >") 2374 else: 2375 value = self._parse_schema(exp.var("TABLE")) 2376 elif self._match_text_seq("NULL", "ON", "NULL", "INPUT"): 2377 null = True 2378 value = None 2379 else: 2380 value = self._parse_types() 2381 2382 return self.expression(exp.ReturnsProperty, this=value, is_table=is_table, null=null) 2383 2384 def _parse_describe(self) -> exp.Describe: 2385 kind = self._match_set(self.CREATABLES) and self._prev.text 2386 style = self._match_texts(("EXTENDED", "FORMATTED", "HISTORY")) and self._prev.text.upper() 2387 if self._match(TokenType.DOT): 2388 style = None 2389 self._retreat(self._index - 2) 2390 this = self._parse_table(schema=True) 2391 properties = self._parse_properties() 2392 expressions = properties.expressions if properties else None 2393 return self.expression( 2394 exp.Describe, this=this, style=style, kind=kind, expressions=expressions 2395 ) 2396 2397 def _parse_insert(self) -> exp.Insert: 2398 comments = ensure_list(self._prev_comments) 2399 hint = self._parse_hint() 2400 overwrite = self._match(TokenType.OVERWRITE) 2401 ignore = self._match(TokenType.IGNORE) 2402 local = self._match_text_seq("LOCAL") 2403 alternative = None 2404 is_function = None 2405 2406 if self._match_text_seq("DIRECTORY"): 2407 this: t.Optional[exp.Expression] = self.expression( 2408 exp.Directory, 2409 this=self._parse_var_or_string(), 2410 local=local, 2411 row_format=self._parse_row_format(match_row=True), 2412 ) 2413 else: 2414 if self._match(TokenType.OR): 2415 alternative = self._match_texts(self.INSERT_ALTERNATIVES) and self._prev.text 2416 2417 self._match(TokenType.INTO) 2418 comments += ensure_list(self._prev_comments) 2419 self._match(TokenType.TABLE) 2420 is_function = self._match(TokenType.FUNCTION) 2421 2422 this = ( 2423 self._parse_table(schema=True, parse_partition=True) 2424 if not is_function 2425 else self._parse_function() 2426 ) 2427 2428 returning = self._parse_returning() 2429 2430 return self.expression( 2431 exp.Insert, 2432 comments=comments, 2433 hint=hint, 2434 is_function=is_function, 2435 this=this, 2436 stored=self._match_text_seq("STORED") and self._parse_stored(), 2437 by_name=self._match_text_seq("BY", "NAME"), 2438 exists=self._parse_exists(), 2439 where=self._match_pair(TokenType.REPLACE, TokenType.WHERE) and self._parse_assignment(), 2440 expression=self._parse_derived_table_values() or self._parse_ddl_select(), 2441 conflict=self._parse_on_conflict(), 2442 returning=returning or self._parse_returning(), 2443 overwrite=overwrite, 2444 alternative=alternative, 2445 ignore=ignore, 2446 ) 2447 2448 def _parse_kill(self) -> exp.Kill: 2449 kind = exp.var(self._prev.text) if self._match_texts(("CONNECTION", "QUERY")) else None 2450 2451 return self.expression( 2452 exp.Kill, 2453 this=self._parse_primary(), 2454 kind=kind, 2455 ) 2456 2457 def _parse_on_conflict(self) -> t.Optional[exp.OnConflict]: 2458 conflict = self._match_text_seq("ON", "CONFLICT") 2459 duplicate = self._match_text_seq("ON", "DUPLICATE", "KEY") 2460 2461 if not conflict and not duplicate: 2462 return None 2463 2464 conflict_keys = None 2465 constraint = None 2466 2467 if conflict: 2468 if self._match_text_seq("ON", "CONSTRAINT"): 2469 constraint = self._parse_id_var() 2470 elif self._match(TokenType.L_PAREN): 2471 conflict_keys = self._parse_csv(self._parse_id_var) 2472 self._match_r_paren() 2473 2474 action = self._parse_var_from_options(self.CONFLICT_ACTIONS) 2475 if self._prev.token_type == TokenType.UPDATE: 2476 self._match(TokenType.SET) 2477 expressions = self._parse_csv(self._parse_equality) 2478 else: 2479 expressions = None 2480 2481 return self.expression( 2482 exp.OnConflict, 2483 duplicate=duplicate, 2484 expressions=expressions, 2485 action=action, 2486 conflict_keys=conflict_keys, 2487 constraint=constraint, 2488 ) 2489 2490 def _parse_returning(self) -> t.Optional[exp.Returning]: 2491 if not self._match(TokenType.RETURNING): 2492 return None 2493 return self.expression( 2494 exp.Returning, 2495 expressions=self._parse_csv(self._parse_expression), 2496 into=self._match(TokenType.INTO) and self._parse_table_part(), 2497 ) 2498 2499 def _parse_row(self) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 2500 if not self._match(TokenType.FORMAT): 2501 return None 2502 return self._parse_row_format() 2503 2504 def _parse_serde_properties(self, with_: bool = False) -> t.Optional[exp.SerdeProperties]: 2505 index = self._index 2506 with_ = with_ or self._match_text_seq("WITH") 2507 2508 if not self._match(TokenType.SERDE_PROPERTIES): 2509 self._retreat(index) 2510 return None 2511 return self.expression( 2512 exp.SerdeProperties, 2513 **{ # type: ignore 2514 "expressions": self._parse_wrapped_properties(), 2515 "with": with_, 2516 }, 2517 ) 2518 2519 def _parse_row_format( 2520 self, match_row: bool = False 2521 ) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 2522 if match_row and not self._match_pair(TokenType.ROW, TokenType.FORMAT): 2523 return None 2524 2525 if self._match_text_seq("SERDE"): 2526 this = self._parse_string() 2527 2528 serde_properties = self._parse_serde_properties() 2529 2530 return self.expression( 2531 exp.RowFormatSerdeProperty, this=this, serde_properties=serde_properties 2532 ) 2533 2534 self._match_text_seq("DELIMITED") 2535 2536 kwargs = {} 2537 2538 if self._match_text_seq("FIELDS", "TERMINATED", "BY"): 2539 kwargs["fields"] = self._parse_string() 2540 if self._match_text_seq("ESCAPED", "BY"): 2541 kwargs["escaped"] = self._parse_string() 2542 if self._match_text_seq("COLLECTION", "ITEMS", "TERMINATED", "BY"): 2543 kwargs["collection_items"] = self._parse_string() 2544 if self._match_text_seq("MAP", "KEYS", "TERMINATED", "BY"): 2545 kwargs["map_keys"] = self._parse_string() 2546 if self._match_text_seq("LINES", "TERMINATED", "BY"): 2547 kwargs["lines"] = self._parse_string() 2548 if self._match_text_seq("NULL", "DEFINED", "AS"): 2549 kwargs["null"] = self._parse_string() 2550 2551 return self.expression(exp.RowFormatDelimitedProperty, **kwargs) # type: ignore 2552 2553 def _parse_load(self) -> exp.LoadData | exp.Command: 2554 if self._match_text_seq("DATA"): 2555 local = self._match_text_seq("LOCAL") 2556 self._match_text_seq("INPATH") 2557 inpath = self._parse_string() 2558 overwrite = self._match(TokenType.OVERWRITE) 2559 self._match_pair(TokenType.INTO, TokenType.TABLE) 2560 2561 return self.expression( 2562 exp.LoadData, 2563 this=self._parse_table(schema=True), 2564 local=local, 2565 overwrite=overwrite, 2566 inpath=inpath, 2567 partition=self._parse_partition(), 2568 input_format=self._match_text_seq("INPUTFORMAT") and self._parse_string(), 2569 serde=self._match_text_seq("SERDE") and self._parse_string(), 2570 ) 2571 return self._parse_as_command(self._prev) 2572 2573 def _parse_delete(self) -> exp.Delete: 2574 # This handles MySQL's "Multiple-Table Syntax" 2575 # https://dev.mysql.com/doc/refman/8.0/en/delete.html 2576 tables = None 2577 comments = self._prev_comments 2578 if not self._match(TokenType.FROM, advance=False): 2579 tables = self._parse_csv(self._parse_table) or None 2580 2581 returning = self._parse_returning() 2582 2583 return self.expression( 2584 exp.Delete, 2585 comments=comments, 2586 tables=tables, 2587 this=self._match(TokenType.FROM) and self._parse_table(joins=True), 2588 using=self._match(TokenType.USING) and self._parse_table(joins=True), 2589 where=self._parse_where(), 2590 returning=returning or self._parse_returning(), 2591 limit=self._parse_limit(), 2592 ) 2593 2594 def _parse_update(self) -> exp.Update: 2595 comments = self._prev_comments 2596 this = self._parse_table(joins=True, alias_tokens=self.UPDATE_ALIAS_TOKENS) 2597 expressions = self._match(TokenType.SET) and self._parse_csv(self._parse_equality) 2598 returning = self._parse_returning() 2599 return self.expression( 2600 exp.Update, 2601 comments=comments, 2602 **{ # type: ignore 2603 "this": this, 2604 "expressions": expressions, 2605 "from": self._parse_from(joins=True), 2606 "where": self._parse_where(), 2607 "returning": returning or self._parse_returning(), 2608 "order": self._parse_order(), 2609 "limit": self._parse_limit(), 2610 }, 2611 ) 2612 2613 def _parse_uncache(self) -> exp.Uncache: 2614 if not self._match(TokenType.TABLE): 2615 self.raise_error("Expecting TABLE after UNCACHE") 2616 2617 return self.expression( 2618 exp.Uncache, exists=self._parse_exists(), this=self._parse_table(schema=True) 2619 ) 2620 2621 def _parse_cache(self) -> exp.Cache: 2622 lazy = self._match_text_seq("LAZY") 2623 self._match(TokenType.TABLE) 2624 table = self._parse_table(schema=True) 2625 2626 options = [] 2627 if self._match_text_seq("OPTIONS"): 2628 self._match_l_paren() 2629 k = self._parse_string() 2630 self._match(TokenType.EQ) 2631 v = self._parse_string() 2632 options = [k, v] 2633 self._match_r_paren() 2634 2635 self._match(TokenType.ALIAS) 2636 return self.expression( 2637 exp.Cache, 2638 this=table, 2639 lazy=lazy, 2640 options=options, 2641 expression=self._parse_select(nested=True), 2642 ) 2643 2644 def _parse_partition(self) -> t.Optional[exp.Partition]: 2645 if not self._match(TokenType.PARTITION): 2646 return None 2647 2648 return self.expression( 2649 exp.Partition, expressions=self._parse_wrapped_csv(self._parse_assignment) 2650 ) 2651 2652 def _parse_value(self) -> t.Optional[exp.Tuple]: 2653 if self._match(TokenType.L_PAREN): 2654 expressions = self._parse_csv(self._parse_expression) 2655 self._match_r_paren() 2656 return self.expression(exp.Tuple, expressions=expressions) 2657 2658 # In some dialects we can have VALUES 1, 2 which results in 1 column & 2 rows. 2659 expression = self._parse_expression() 2660 if expression: 2661 return self.expression(exp.Tuple, expressions=[expression]) 2662 return None 2663 2664 def _parse_projections(self) -> t.List[exp.Expression]: 2665 return self._parse_expressions() 2666 2667 def _parse_select( 2668 self, 2669 nested: bool = False, 2670 table: bool = False, 2671 parse_subquery_alias: bool = True, 2672 parse_set_operation: bool = True, 2673 ) -> t.Optional[exp.Expression]: 2674 cte = self._parse_with() 2675 2676 if cte: 2677 this = self._parse_statement() 2678 2679 if not this: 2680 self.raise_error("Failed to parse any statement following CTE") 2681 return cte 2682 2683 if "with" in this.arg_types: 2684 this.set("with", cte) 2685 else: 2686 self.raise_error(f"{this.key} does not support CTE") 2687 this = cte 2688 2689 return this 2690 2691 # duckdb supports leading with FROM x 2692 from_ = self._parse_from() if self._match(TokenType.FROM, advance=False) else None 2693 2694 if self._match(TokenType.SELECT): 2695 comments = self._prev_comments 2696 2697 hint = self._parse_hint() 2698 all_ = self._match(TokenType.ALL) 2699 distinct = self._match_set(self.DISTINCT_TOKENS) 2700 2701 kind = ( 2702 self._match(TokenType.ALIAS) 2703 and self._match_texts(("STRUCT", "VALUE")) 2704 and self._prev.text.upper() 2705 ) 2706 2707 if distinct: 2708 distinct = self.expression( 2709 exp.Distinct, 2710 on=self._parse_value() if self._match(TokenType.ON) else None, 2711 ) 2712 2713 if all_ and distinct: 2714 self.raise_error("Cannot specify both ALL and DISTINCT after SELECT") 2715 2716 limit = self._parse_limit(top=True) 2717 projections = self._parse_projections() 2718 2719 this = self.expression( 2720 exp.Select, 2721 kind=kind, 2722 hint=hint, 2723 distinct=distinct, 2724 expressions=projections, 2725 limit=limit, 2726 ) 2727 this.comments = comments 2728 2729 into = self._parse_into() 2730 if into: 2731 this.set("into", into) 2732 2733 if not from_: 2734 from_ = self._parse_from() 2735 2736 if from_: 2737 this.set("from", from_) 2738 2739 this = self._parse_query_modifiers(this) 2740 elif (table or nested) and self._match(TokenType.L_PAREN): 2741 if self._match(TokenType.PIVOT): 2742 this = self._parse_simplified_pivot() 2743 elif self._match(TokenType.FROM): 2744 this = exp.select("*").from_( 2745 t.cast(exp.From, self._parse_from(skip_from_token=True)) 2746 ) 2747 else: 2748 this = ( 2749 self._parse_table() 2750 if table 2751 else self._parse_select(nested=True, parse_set_operation=False) 2752 ) 2753 this = self._parse_query_modifiers(self._parse_set_operations(this)) 2754 2755 self._match_r_paren() 2756 2757 # We return early here so that the UNION isn't attached to the subquery by the 2758 # following call to _parse_set_operations, but instead becomes the parent node 2759 return self._parse_subquery(this, parse_alias=parse_subquery_alias) 2760 elif self._match(TokenType.VALUES, advance=False): 2761 this = self._parse_derived_table_values() 2762 elif from_: 2763 this = exp.select("*").from_(from_.this, copy=False) 2764 else: 2765 this = None 2766 2767 if parse_set_operation: 2768 return self._parse_set_operations(this) 2769 return this 2770 2771 def _parse_with(self, skip_with_token: bool = False) -> t.Optional[exp.With]: 2772 if not skip_with_token and not self._match(TokenType.WITH): 2773 return None 2774 2775 comments = self._prev_comments 2776 recursive = self._match(TokenType.RECURSIVE) 2777 2778 expressions = [] 2779 while True: 2780 expressions.append(self._parse_cte()) 2781 2782 if not self._match(TokenType.COMMA) and not self._match(TokenType.WITH): 2783 break 2784 else: 2785 self._match(TokenType.WITH) 2786 2787 return self.expression( 2788 exp.With, comments=comments, expressions=expressions, recursive=recursive 2789 ) 2790 2791 def _parse_cte(self) -> exp.CTE: 2792 alias = self._parse_table_alias(self.ID_VAR_TOKENS) 2793 if not alias or not alias.this: 2794 self.raise_error("Expected CTE to have alias") 2795 2796 self._match(TokenType.ALIAS) 2797 2798 if self._match_text_seq("NOT", "MATERIALIZED"): 2799 materialized = False 2800 elif self._match_text_seq("MATERIALIZED"): 2801 materialized = True 2802 else: 2803 materialized = None 2804 2805 return self.expression( 2806 exp.CTE, 2807 this=self._parse_wrapped(self._parse_statement), 2808 alias=alias, 2809 materialized=materialized, 2810 ) 2811 2812 def _parse_table_alias( 2813 self, alias_tokens: t.Optional[t.Collection[TokenType]] = None 2814 ) -> t.Optional[exp.TableAlias]: 2815 any_token = self._match(TokenType.ALIAS) 2816 alias = ( 2817 self._parse_id_var(any_token=any_token, tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 2818 or self._parse_string_as_identifier() 2819 ) 2820 2821 index = self._index 2822 if self._match(TokenType.L_PAREN): 2823 columns = self._parse_csv(self._parse_function_parameter) 2824 self._match_r_paren() if columns else self._retreat(index) 2825 else: 2826 columns = None 2827 2828 if not alias and not columns: 2829 return None 2830 2831 table_alias = self.expression(exp.TableAlias, this=alias, columns=columns) 2832 2833 # We bubble up comments from the Identifier to the TableAlias 2834 if isinstance(alias, exp.Identifier): 2835 table_alias.add_comments(alias.pop_comments()) 2836 2837 return table_alias 2838 2839 def _parse_subquery( 2840 self, this: t.Optional[exp.Expression], parse_alias: bool = True 2841 ) -> t.Optional[exp.Subquery]: 2842 if not this: 2843 return None 2844 2845 return self.expression( 2846 exp.Subquery, 2847 this=this, 2848 pivots=self._parse_pivots(), 2849 alias=self._parse_table_alias() if parse_alias else None, 2850 ) 2851 2852 def _implicit_unnests_to_explicit(self, this: E) -> E: 2853 from sqlglot.optimizer.normalize_identifiers import normalize_identifiers as _norm 2854 2855 refs = {_norm(this.args["from"].this.copy(), dialect=self.dialect).alias_or_name} 2856 for i, join in enumerate(this.args.get("joins") or []): 2857 table = join.this 2858 normalized_table = table.copy() 2859 normalized_table.meta["maybe_column"] = True 2860 normalized_table = _norm(normalized_table, dialect=self.dialect) 2861 2862 if isinstance(table, exp.Table) and not join.args.get("on"): 2863 if normalized_table.parts[0].name in refs: 2864 table_as_column = table.to_column() 2865 unnest = exp.Unnest(expressions=[table_as_column]) 2866 2867 # Table.to_column creates a parent Alias node that we want to convert to 2868 # a TableAlias and attach to the Unnest, so it matches the parser's output 2869 if isinstance(table.args.get("alias"), exp.TableAlias): 2870 table_as_column.replace(table_as_column.this) 2871 exp.alias_(unnest, None, table=[table.args["alias"].this], copy=False) 2872 2873 table.replace(unnest) 2874 2875 refs.add(normalized_table.alias_or_name) 2876 2877 return this 2878 2879 def _parse_query_modifiers( 2880 self, this: t.Optional[exp.Expression] 2881 ) -> t.Optional[exp.Expression]: 2882 if isinstance(this, (exp.Query, exp.Table)): 2883 for join in self._parse_joins(): 2884 this.append("joins", join) 2885 for lateral in iter(self._parse_lateral, None): 2886 this.append("laterals", lateral) 2887 2888 while True: 2889 if self._match_set(self.QUERY_MODIFIER_PARSERS, advance=False): 2890 parser = self.QUERY_MODIFIER_PARSERS[self._curr.token_type] 2891 key, expression = parser(self) 2892 2893 if expression: 2894 this.set(key, expression) 2895 if key == "limit": 2896 offset = expression.args.pop("offset", None) 2897 2898 if offset: 2899 offset = exp.Offset(expression=offset) 2900 this.set("offset", offset) 2901 2902 limit_by_expressions = expression.expressions 2903 expression.set("expressions", None) 2904 offset.set("expressions", limit_by_expressions) 2905 continue 2906 break 2907 2908 if self.SUPPORTS_IMPLICIT_UNNEST and this and this.args.get("from"): 2909 this = self._implicit_unnests_to_explicit(this) 2910 2911 return this 2912 2913 def _parse_hint(self) -> t.Optional[exp.Hint]: 2914 if self._match(TokenType.HINT): 2915 hints = [] 2916 for hint in iter( 2917 lambda: self._parse_csv( 2918 lambda: self._parse_function() or self._parse_var(upper=True) 2919 ), 2920 [], 2921 ): 2922 hints.extend(hint) 2923 2924 if not self._match_pair(TokenType.STAR, TokenType.SLASH): 2925 self.raise_error("Expected */ after HINT") 2926 2927 return self.expression(exp.Hint, expressions=hints) 2928 2929 return None 2930 2931 def _parse_into(self) -> t.Optional[exp.Into]: 2932 if not self._match(TokenType.INTO): 2933 return None 2934 2935 temp = self._match(TokenType.TEMPORARY) 2936 unlogged = self._match_text_seq("UNLOGGED") 2937 self._match(TokenType.TABLE) 2938 2939 return self.expression( 2940 exp.Into, this=self._parse_table(schema=True), temporary=temp, unlogged=unlogged 2941 ) 2942 2943 def _parse_from( 2944 self, joins: bool = False, skip_from_token: bool = False 2945 ) -> t.Optional[exp.From]: 2946 if not skip_from_token and not self._match(TokenType.FROM): 2947 return None 2948 2949 return self.expression( 2950 exp.From, comments=self._prev_comments, this=self._parse_table(joins=joins) 2951 ) 2952 2953 def _parse_match_recognize_measure(self) -> exp.MatchRecognizeMeasure: 2954 return self.expression( 2955 exp.MatchRecognizeMeasure, 2956 window_frame=self._match_texts(("FINAL", "RUNNING")) and self._prev.text.upper(), 2957 this=self._parse_expression(), 2958 ) 2959 2960 def _parse_match_recognize(self) -> t.Optional[exp.MatchRecognize]: 2961 if not self._match(TokenType.MATCH_RECOGNIZE): 2962 return None 2963 2964 self._match_l_paren() 2965 2966 partition = self._parse_partition_by() 2967 order = self._parse_order() 2968 2969 measures = ( 2970 self._parse_csv(self._parse_match_recognize_measure) 2971 if self._match_text_seq("MEASURES") 2972 else None 2973 ) 2974 2975 if self._match_text_seq("ONE", "ROW", "PER", "MATCH"): 2976 rows = exp.var("ONE ROW PER MATCH") 2977 elif self._match_text_seq("ALL", "ROWS", "PER", "MATCH"): 2978 text = "ALL ROWS PER MATCH" 2979 if self._match_text_seq("SHOW", "EMPTY", "MATCHES"): 2980 text += " SHOW EMPTY MATCHES" 2981 elif self._match_text_seq("OMIT", "EMPTY", "MATCHES"): 2982 text += " OMIT EMPTY MATCHES" 2983 elif self._match_text_seq("WITH", "UNMATCHED", "ROWS"): 2984 text += " WITH UNMATCHED ROWS" 2985 rows = exp.var(text) 2986 else: 2987 rows = None 2988 2989 if self._match_text_seq("AFTER", "MATCH", "SKIP"): 2990 text = "AFTER MATCH SKIP" 2991 if self._match_text_seq("PAST", "LAST", "ROW"): 2992 text += " PAST LAST ROW" 2993 elif self._match_text_seq("TO", "NEXT", "ROW"): 2994 text += " TO NEXT ROW" 2995 elif self._match_text_seq("TO", "FIRST"): 2996 text += f" TO FIRST {self._advance_any().text}" # type: ignore 2997 elif self._match_text_seq("TO", "LAST"): 2998 text += f" TO LAST {self._advance_any().text}" # type: ignore 2999 after = exp.var(text) 3000 else: 3001 after = None 3002 3003 if self._match_text_seq("PATTERN"): 3004 self._match_l_paren() 3005 3006 if not self._curr: 3007 self.raise_error("Expecting )", self._curr) 3008 3009 paren = 1 3010 start = self._curr 3011 3012 while self._curr and paren > 0: 3013 if self._curr.token_type == TokenType.L_PAREN: 3014 paren += 1 3015 if self._curr.token_type == TokenType.R_PAREN: 3016 paren -= 1 3017 3018 end = self._prev 3019 self._advance() 3020 3021 if paren > 0: 3022 self.raise_error("Expecting )", self._curr) 3023 3024 pattern = exp.var(self._find_sql(start, end)) 3025 else: 3026 pattern = None 3027 3028 define = ( 3029 self._parse_csv(self._parse_name_as_expression) 3030 if self._match_text_seq("DEFINE") 3031 else None 3032 ) 3033 3034 self._match_r_paren() 3035 3036 return self.expression( 3037 exp.MatchRecognize, 3038 partition_by=partition, 3039 order=order, 3040 measures=measures, 3041 rows=rows, 3042 after=after, 3043 pattern=pattern, 3044 define=define, 3045 alias=self._parse_table_alias(), 3046 ) 3047 3048 def _parse_lateral(self) -> t.Optional[exp.Lateral]: 3049 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY) 3050 if not cross_apply and self._match_pair(TokenType.OUTER, TokenType.APPLY): 3051 cross_apply = False 3052 3053 if cross_apply is not None: 3054 this = self._parse_select(table=True) 3055 view = None 3056 outer = None 3057 elif self._match(TokenType.LATERAL): 3058 this = self._parse_select(table=True) 3059 view = self._match(TokenType.VIEW) 3060 outer = self._match(TokenType.OUTER) 3061 else: 3062 return None 3063 3064 if not this: 3065 this = ( 3066 self._parse_unnest() 3067 or self._parse_function() 3068 or self._parse_id_var(any_token=False) 3069 ) 3070 3071 while self._match(TokenType.DOT): 3072 this = exp.Dot( 3073 this=this, 3074 expression=self._parse_function() or self._parse_id_var(any_token=False), 3075 ) 3076 3077 if view: 3078 table = self._parse_id_var(any_token=False) 3079 columns = self._parse_csv(self._parse_id_var) if self._match(TokenType.ALIAS) else [] 3080 table_alias: t.Optional[exp.TableAlias] = self.expression( 3081 exp.TableAlias, this=table, columns=columns 3082 ) 3083 elif isinstance(this, (exp.Subquery, exp.Unnest)) and this.alias: 3084 # We move the alias from the lateral's child node to the lateral itself 3085 table_alias = this.args["alias"].pop() 3086 else: 3087 table_alias = self._parse_table_alias() 3088 3089 return self.expression( 3090 exp.Lateral, 3091 this=this, 3092 view=view, 3093 outer=outer, 3094 alias=table_alias, 3095 cross_apply=cross_apply, 3096 ) 3097 3098 def _parse_join_parts( 3099 self, 3100 ) -> t.Tuple[t.Optional[Token], t.Optional[Token], t.Optional[Token]]: 3101 return ( 3102 self._match_set(self.JOIN_METHODS) and self._prev, 3103 self._match_set(self.JOIN_SIDES) and self._prev, 3104 self._match_set(self.JOIN_KINDS) and self._prev, 3105 ) 3106 3107 def _parse_join( 3108 self, skip_join_token: bool = False, parse_bracket: bool = False 3109 ) -> t.Optional[exp.Join]: 3110 if self._match(TokenType.COMMA): 3111 return self.expression(exp.Join, this=self._parse_table()) 3112 3113 index = self._index 3114 method, side, kind = self._parse_join_parts() 3115 hint = self._prev.text if self._match_texts(self.JOIN_HINTS) else None 3116 join = self._match(TokenType.JOIN) or (kind and kind.token_type == TokenType.STRAIGHT_JOIN) 3117 3118 if not skip_join_token and not join: 3119 self._retreat(index) 3120 kind = None 3121 method = None 3122 side = None 3123 3124 outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY, False) 3125 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY, False) 3126 3127 if not skip_join_token and not join and not outer_apply and not cross_apply: 3128 return None 3129 3130 kwargs: t.Dict[str, t.Any] = {"this": self._parse_table(parse_bracket=parse_bracket)} 3131 3132 if method: 3133 kwargs["method"] = method.text 3134 if side: 3135 kwargs["side"] = side.text 3136 if kind: 3137 kwargs["kind"] = kind.text 3138 if hint: 3139 kwargs["hint"] = hint 3140 3141 if self._match(TokenType.MATCH_CONDITION): 3142 kwargs["match_condition"] = self._parse_wrapped(self._parse_comparison) 3143 3144 if self._match(TokenType.ON): 3145 kwargs["on"] = self._parse_assignment() 3146 elif self._match(TokenType.USING): 3147 kwargs["using"] = self._parse_wrapped_id_vars() 3148 elif not isinstance(kwargs["this"], exp.Unnest) and not ( 3149 kind and kind.token_type == TokenType.CROSS 3150 ): 3151 index = self._index 3152 joins: t.Optional[list] = list(self._parse_joins()) 3153 3154 if joins and self._match(TokenType.ON): 3155 kwargs["on"] = self._parse_assignment() 3156 elif joins and self._match(TokenType.USING): 3157 kwargs["using"] = self._parse_wrapped_id_vars() 3158 else: 3159 joins = None 3160 self._retreat(index) 3161 3162 kwargs["this"].set("joins", joins if joins else None) 3163 3164 comments = [c for token in (method, side, kind) if token for c in token.comments] 3165 return self.expression(exp.Join, comments=comments, **kwargs) 3166 3167 def _parse_opclass(self) -> t.Optional[exp.Expression]: 3168 this = self._parse_assignment() 3169 3170 if self._match_texts(self.OPCLASS_FOLLOW_KEYWORDS, advance=False): 3171 return this 3172 3173 if not self._match_set(self.OPTYPE_FOLLOW_TOKENS, advance=False): 3174 return self.expression(exp.Opclass, this=this, expression=self._parse_table_parts()) 3175 3176 return this 3177 3178 def _parse_index_params(self) -> exp.IndexParameters: 3179 using = self._parse_var(any_token=True) if self._match(TokenType.USING) else None 3180 3181 if self._match(TokenType.L_PAREN, advance=False): 3182 columns = self._parse_wrapped_csv(self._parse_with_operator) 3183 else: 3184 columns = None 3185 3186 include = self._parse_wrapped_id_vars() if self._match_text_seq("INCLUDE") else None 3187 partition_by = self._parse_partition_by() 3188 with_storage = self._match(TokenType.WITH) and self._parse_wrapped_properties() 3189 tablespace = ( 3190 self._parse_var(any_token=True) 3191 if self._match_text_seq("USING", "INDEX", "TABLESPACE") 3192 else None 3193 ) 3194 where = self._parse_where() 3195 3196 on = self._parse_field() if self._match(TokenType.ON) else None 3197 3198 return self.expression( 3199 exp.IndexParameters, 3200 using=using, 3201 columns=columns, 3202 include=include, 3203 partition_by=partition_by, 3204 where=where, 3205 with_storage=with_storage, 3206 tablespace=tablespace, 3207 on=on, 3208 ) 3209 3210 def _parse_index( 3211 self, index: t.Optional[exp.Expression] = None, anonymous: bool = False 3212 ) -> t.Optional[exp.Index]: 3213 if index or anonymous: 3214 unique = None 3215 primary = None 3216 amp = None 3217 3218 self._match(TokenType.ON) 3219 self._match(TokenType.TABLE) # hive 3220 table = self._parse_table_parts(schema=True) 3221 else: 3222 unique = self._match(TokenType.UNIQUE) 3223 primary = self._match_text_seq("PRIMARY") 3224 amp = self._match_text_seq("AMP") 3225 3226 if not self._match(TokenType.INDEX): 3227 return None 3228 3229 index = self._parse_id_var() 3230 table = None 3231 3232 params = self._parse_index_params() 3233 3234 return self.expression( 3235 exp.Index, 3236 this=index, 3237 table=table, 3238 unique=unique, 3239 primary=primary, 3240 amp=amp, 3241 params=params, 3242 ) 3243 3244 def _parse_table_hints(self) -> t.Optional[t.List[exp.Expression]]: 3245 hints: t.List[exp.Expression] = [] 3246 if self._match_pair(TokenType.WITH, TokenType.L_PAREN): 3247 # https://learn.microsoft.com/en-us/sql/t-sql/queries/hints-transact-sql-table?view=sql-server-ver16 3248 hints.append( 3249 self.expression( 3250 exp.WithTableHint, 3251 expressions=self._parse_csv( 3252 lambda: self._parse_function() or self._parse_var(any_token=True) 3253 ), 3254 ) 3255 ) 3256 self._match_r_paren() 3257 else: 3258 # https://dev.mysql.com/doc/refman/8.0/en/index-hints.html 3259 while self._match_set(self.TABLE_INDEX_HINT_TOKENS): 3260 hint = exp.IndexTableHint(this=self._prev.text.upper()) 3261 3262 self._match_set((TokenType.INDEX, TokenType.KEY)) 3263 if self._match(TokenType.FOR): 3264 hint.set("target", self._advance_any() and self._prev.text.upper()) 3265 3266 hint.set("expressions", self._parse_wrapped_id_vars()) 3267 hints.append(hint) 3268 3269 return hints or None 3270 3271 def _parse_table_part(self, schema: bool = False) -> t.Optional[exp.Expression]: 3272 return ( 3273 (not schema and self._parse_function(optional_parens=False)) 3274 or self._parse_id_var(any_token=False) 3275 or self._parse_string_as_identifier() 3276 or self._parse_placeholder() 3277 ) 3278 3279 def _parse_table_parts( 3280 self, schema: bool = False, is_db_reference: bool = False, wildcard: bool = False 3281 ) -> exp.Table: 3282 catalog = None 3283 db = None 3284 table: t.Optional[exp.Expression | str] = self._parse_table_part(schema=schema) 3285 3286 while self._match(TokenType.DOT): 3287 if catalog: 3288 # This allows nesting the table in arbitrarily many dot expressions if needed 3289 table = self.expression( 3290 exp.Dot, this=table, expression=self._parse_table_part(schema=schema) 3291 ) 3292 else: 3293 catalog = db 3294 db = table 3295 # "" used for tsql FROM a..b case 3296 table = self._parse_table_part(schema=schema) or "" 3297 3298 if ( 3299 wildcard 3300 and self._is_connected() 3301 and (isinstance(table, exp.Identifier) or not table) 3302 and self._match(TokenType.STAR) 3303 ): 3304 if isinstance(table, exp.Identifier): 3305 table.args["this"] += "*" 3306 else: 3307 table = exp.Identifier(this="*") 3308 3309 # We bubble up comments from the Identifier to the Table 3310 comments = table.pop_comments() if isinstance(table, exp.Expression) else None 3311 3312 if is_db_reference: 3313 catalog = db 3314 db = table 3315 table = None 3316 3317 if not table and not is_db_reference: 3318 self.raise_error(f"Expected table name but got {self._curr}") 3319 if not db and is_db_reference: 3320 self.raise_error(f"Expected database name but got {self._curr}") 3321 3322 return self.expression( 3323 exp.Table, 3324 comments=comments, 3325 this=table, 3326 db=db, 3327 catalog=catalog, 3328 pivots=self._parse_pivots(), 3329 ) 3330 3331 def _parse_table( 3332 self, 3333 schema: bool = False, 3334 joins: bool = False, 3335 alias_tokens: t.Optional[t.Collection[TokenType]] = None, 3336 parse_bracket: bool = False, 3337 is_db_reference: bool = False, 3338 parse_partition: bool = False, 3339 ) -> t.Optional[exp.Expression]: 3340 lateral = self._parse_lateral() 3341 if lateral: 3342 return lateral 3343 3344 unnest = self._parse_unnest() 3345 if unnest: 3346 return unnest 3347 3348 values = self._parse_derived_table_values() 3349 if values: 3350 return values 3351 3352 subquery = self._parse_select(table=True) 3353 if subquery: 3354 if not subquery.args.get("pivots"): 3355 subquery.set("pivots", self._parse_pivots()) 3356 return subquery 3357 3358 bracket = parse_bracket and self._parse_bracket(None) 3359 bracket = self.expression(exp.Table, this=bracket) if bracket else None 3360 3361 only = self._match(TokenType.ONLY) 3362 3363 this = t.cast( 3364 exp.Expression, 3365 bracket 3366 or self._parse_bracket( 3367 self._parse_table_parts(schema=schema, is_db_reference=is_db_reference) 3368 ), 3369 ) 3370 3371 if only: 3372 this.set("only", only) 3373 3374 # Postgres supports a wildcard (table) suffix operator, which is a no-op in this context 3375 self._match_text_seq("*") 3376 3377 parse_partition = parse_partition or self.SUPPORTS_PARTITION_SELECTION 3378 if parse_partition and self._match(TokenType.PARTITION, advance=False): 3379 this.set("partition", self._parse_partition()) 3380 3381 if schema: 3382 return self._parse_schema(this=this) 3383 3384 version = self._parse_version() 3385 3386 if version: 3387 this.set("version", version) 3388 3389 if self.dialect.ALIAS_POST_TABLESAMPLE: 3390 table_sample = self._parse_table_sample() 3391 3392 alias = self._parse_table_alias(alias_tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 3393 if alias: 3394 this.set("alias", alias) 3395 3396 if isinstance(this, exp.Table) and self._match_text_seq("AT"): 3397 return self.expression( 3398 exp.AtIndex, this=this.to_column(copy=False), expression=self._parse_id_var() 3399 ) 3400 3401 this.set("hints", self._parse_table_hints()) 3402 3403 if not this.args.get("pivots"): 3404 this.set("pivots", self._parse_pivots()) 3405 3406 if not self.dialect.ALIAS_POST_TABLESAMPLE: 3407 table_sample = self._parse_table_sample() 3408 3409 if table_sample: 3410 table_sample.set("this", this) 3411 this = table_sample 3412 3413 if joins: 3414 for join in self._parse_joins(): 3415 this.append("joins", join) 3416 3417 if self._match_pair(TokenType.WITH, TokenType.ORDINALITY): 3418 this.set("ordinality", True) 3419 this.set("alias", self._parse_table_alias()) 3420 3421 return this 3422 3423 def _parse_version(self) -> t.Optional[exp.Version]: 3424 if self._match(TokenType.TIMESTAMP_SNAPSHOT): 3425 this = "TIMESTAMP" 3426 elif self._match(TokenType.VERSION_SNAPSHOT): 3427 this = "VERSION" 3428 else: 3429 return None 3430 3431 if self._match_set((TokenType.FROM, TokenType.BETWEEN)): 3432 kind = self._prev.text.upper() 3433 start = self._parse_bitwise() 3434 self._match_texts(("TO", "AND")) 3435 end = self._parse_bitwise() 3436 expression: t.Optional[exp.Expression] = self.expression( 3437 exp.Tuple, expressions=[start, end] 3438 ) 3439 elif self._match_text_seq("CONTAINED", "IN"): 3440 kind = "CONTAINED IN" 3441 expression = self.expression( 3442 exp.Tuple, expressions=self._parse_wrapped_csv(self._parse_bitwise) 3443 ) 3444 elif self._match(TokenType.ALL): 3445 kind = "ALL" 3446 expression = None 3447 else: 3448 self._match_text_seq("AS", "OF") 3449 kind = "AS OF" 3450 expression = self._parse_type() 3451 3452 return self.expression(exp.Version, this=this, expression=expression, kind=kind) 3453 3454 def _parse_unnest(self, with_alias: bool = True) -> t.Optional[exp.Unnest]: 3455 if not self._match(TokenType.UNNEST): 3456 return None 3457 3458 expressions = self._parse_wrapped_csv(self._parse_equality) 3459 offset = self._match_pair(TokenType.WITH, TokenType.ORDINALITY) 3460 3461 alias = self._parse_table_alias() if with_alias else None 3462 3463 if alias: 3464 if self.dialect.UNNEST_COLUMN_ONLY: 3465 if alias.args.get("columns"): 3466 self.raise_error("Unexpected extra column alias in unnest.") 3467 3468 alias.set("columns", [alias.this]) 3469 alias.set("this", None) 3470 3471 columns = alias.args.get("columns") or [] 3472 if offset and len(expressions) < len(columns): 3473 offset = columns.pop() 3474 3475 if not offset and self._match_pair(TokenType.WITH, TokenType.OFFSET): 3476 self._match(TokenType.ALIAS) 3477 offset = self._parse_id_var( 3478 any_token=False, tokens=self.UNNEST_OFFSET_ALIAS_TOKENS 3479 ) or exp.to_identifier("offset") 3480 3481 return self.expression(exp.Unnest, expressions=expressions, alias=alias, offset=offset) 3482 3483 def _parse_derived_table_values(self) -> t.Optional[exp.Values]: 3484 is_derived = self._match_pair(TokenType.L_PAREN, TokenType.VALUES) 3485 if not is_derived and not self._match_text_seq("VALUES"): 3486 return None 3487 3488 expressions = self._parse_csv(self._parse_value) 3489 alias = self._parse_table_alias() 3490 3491 if is_derived: 3492 self._match_r_paren() 3493 3494 return self.expression( 3495 exp.Values, expressions=expressions, alias=alias or self._parse_table_alias() 3496 ) 3497 3498 def _parse_table_sample(self, as_modifier: bool = False) -> t.Optional[exp.TableSample]: 3499 if not self._match(TokenType.TABLE_SAMPLE) and not ( 3500 as_modifier and self._match_text_seq("USING", "SAMPLE") 3501 ): 3502 return None 3503 3504 bucket_numerator = None 3505 bucket_denominator = None 3506 bucket_field = None 3507 percent = None 3508 size = None 3509 seed = None 3510 3511 method = self._parse_var(tokens=(TokenType.ROW,), upper=True) 3512 matched_l_paren = self._match(TokenType.L_PAREN) 3513 3514 if self.TABLESAMPLE_CSV: 3515 num = None 3516 expressions = self._parse_csv(self._parse_primary) 3517 else: 3518 expressions = None 3519 num = ( 3520 self._parse_factor() 3521 if self._match(TokenType.NUMBER, advance=False) 3522 else self._parse_primary() or self._parse_placeholder() 3523 ) 3524 3525 if self._match_text_seq("BUCKET"): 3526 bucket_numerator = self._parse_number() 3527 self._match_text_seq("OUT", "OF") 3528 bucket_denominator = bucket_denominator = self._parse_number() 3529 self._match(TokenType.ON) 3530 bucket_field = self._parse_field() 3531 elif self._match_set((TokenType.PERCENT, TokenType.MOD)): 3532 percent = num 3533 elif self._match(TokenType.ROWS) or not self.dialect.TABLESAMPLE_SIZE_IS_PERCENT: 3534 size = num 3535 else: 3536 percent = num 3537 3538 if matched_l_paren: 3539 self._match_r_paren() 3540 3541 if self._match(TokenType.L_PAREN): 3542 method = self._parse_var(upper=True) 3543 seed = self._match(TokenType.COMMA) and self._parse_number() 3544 self._match_r_paren() 3545 elif self._match_texts(("SEED", "REPEATABLE")): 3546 seed = self._parse_wrapped(self._parse_number) 3547 3548 if not method and self.DEFAULT_SAMPLING_METHOD: 3549 method = exp.var(self.DEFAULT_SAMPLING_METHOD) 3550 3551 return self.expression( 3552 exp.TableSample, 3553 expressions=expressions, 3554 method=method, 3555 bucket_numerator=bucket_numerator, 3556 bucket_denominator=bucket_denominator, 3557 bucket_field=bucket_field, 3558 percent=percent, 3559 size=size, 3560 seed=seed, 3561 ) 3562 3563 def _parse_pivots(self) -> t.Optional[t.List[exp.Pivot]]: 3564 return list(iter(self._parse_pivot, None)) or None 3565 3566 def _parse_joins(self) -> t.Iterator[exp.Join]: 3567 return iter(self._parse_join, None) 3568 3569 # https://duckdb.org/docs/sql/statements/pivot 3570 def _parse_simplified_pivot(self) -> exp.Pivot: 3571 def _parse_on() -> t.Optional[exp.Expression]: 3572 this = self._parse_bitwise() 3573 return self._parse_in(this) if self._match(TokenType.IN) else this 3574 3575 this = self._parse_table() 3576 expressions = self._match(TokenType.ON) and self._parse_csv(_parse_on) 3577 using = self._match(TokenType.USING) and self._parse_csv( 3578 lambda: self._parse_alias(self._parse_function()) 3579 ) 3580 group = self._parse_group() 3581 return self.expression( 3582 exp.Pivot, this=this, expressions=expressions, using=using, group=group 3583 ) 3584 3585 def _parse_pivot_in(self) -> exp.In: 3586 def _parse_aliased_expression() -> t.Optional[exp.Expression]: 3587 this = self._parse_assignment() 3588 3589 self._match(TokenType.ALIAS) 3590 alias = self._parse_field() 3591 if alias: 3592 return self.expression(exp.PivotAlias, this=this, alias=alias) 3593 3594 return this 3595 3596 value = self._parse_column() 3597 3598 if not self._match_pair(TokenType.IN, TokenType.L_PAREN): 3599 self.raise_error("Expecting IN (") 3600 3601 aliased_expressions = self._parse_csv(_parse_aliased_expression) 3602 3603 self._match_r_paren() 3604 return self.expression(exp.In, this=value, expressions=aliased_expressions) 3605 3606 def _parse_pivot(self) -> t.Optional[exp.Pivot]: 3607 index = self._index 3608 include_nulls = None 3609 3610 if self._match(TokenType.PIVOT): 3611 unpivot = False 3612 elif self._match(TokenType.UNPIVOT): 3613 unpivot = True 3614 3615 # https://docs.databricks.com/en/sql/language-manual/sql-ref-syntax-qry-select-unpivot.html#syntax 3616 if self._match_text_seq("INCLUDE", "NULLS"): 3617 include_nulls = True 3618 elif self._match_text_seq("EXCLUDE", "NULLS"): 3619 include_nulls = False 3620 else: 3621 return None 3622 3623 expressions = [] 3624 3625 if not self._match(TokenType.L_PAREN): 3626 self._retreat(index) 3627 return None 3628 3629 if unpivot: 3630 expressions = self._parse_csv(self._parse_column) 3631 else: 3632 expressions = self._parse_csv(lambda: self._parse_alias(self._parse_function())) 3633 3634 if not expressions: 3635 self.raise_error("Failed to parse PIVOT's aggregation list") 3636 3637 if not self._match(TokenType.FOR): 3638 self.raise_error("Expecting FOR") 3639 3640 field = self._parse_pivot_in() 3641 3642 self._match_r_paren() 3643 3644 pivot = self.expression( 3645 exp.Pivot, 3646 expressions=expressions, 3647 field=field, 3648 unpivot=unpivot, 3649 include_nulls=include_nulls, 3650 ) 3651 3652 if not self._match_set((TokenType.PIVOT, TokenType.UNPIVOT), advance=False): 3653 pivot.set("alias", self._parse_table_alias()) 3654 3655 if not unpivot: 3656 names = self._pivot_column_names(t.cast(t.List[exp.Expression], expressions)) 3657 3658 columns: t.List[exp.Expression] = [] 3659 for fld in pivot.args["field"].expressions: 3660 field_name = fld.sql() if self.IDENTIFY_PIVOT_STRINGS else fld.alias_or_name 3661 for name in names: 3662 if self.PREFIXED_PIVOT_COLUMNS: 3663 name = f"{name}_{field_name}" if name else field_name 3664 else: 3665 name = f"{field_name}_{name}" if name else field_name 3666 3667 columns.append(exp.to_identifier(name)) 3668 3669 pivot.set("columns", columns) 3670 3671 return pivot 3672 3673 def _pivot_column_names(self, aggregations: t.List[exp.Expression]) -> t.List[str]: 3674 return [agg.alias for agg in aggregations] 3675 3676 def _parse_prewhere(self, skip_where_token: bool = False) -> t.Optional[exp.PreWhere]: 3677 if not skip_where_token and not self._match(TokenType.PREWHERE): 3678 return None 3679 3680 return self.expression( 3681 exp.PreWhere, comments=self._prev_comments, this=self._parse_assignment() 3682 ) 3683 3684 def _parse_where(self, skip_where_token: bool = False) -> t.Optional[exp.Where]: 3685 if not skip_where_token and not self._match(TokenType.WHERE): 3686 return None 3687 3688 return self.expression( 3689 exp.Where, comments=self._prev_comments, this=self._parse_assignment() 3690 ) 3691 3692 def _parse_group(self, skip_group_by_token: bool = False) -> t.Optional[exp.Group]: 3693 if not skip_group_by_token and not self._match(TokenType.GROUP_BY): 3694 return None 3695 3696 elements: t.Dict[str, t.Any] = defaultdict(list) 3697 3698 if self._match(TokenType.ALL): 3699 elements["all"] = True 3700 elif self._match(TokenType.DISTINCT): 3701 elements["all"] = False 3702 3703 while True: 3704 expressions = self._parse_csv( 3705 lambda: None 3706 if self._match(TokenType.ROLLUP, advance=False) 3707 else self._parse_assignment() 3708 ) 3709 if expressions: 3710 elements["expressions"].extend(expressions) 3711 3712 grouping_sets = self._parse_grouping_sets() 3713 if grouping_sets: 3714 elements["grouping_sets"].extend(grouping_sets) 3715 3716 rollup = None 3717 cube = None 3718 totals = None 3719 3720 index = self._index 3721 with_ = self._match(TokenType.WITH) 3722 if self._match(TokenType.ROLLUP): 3723 rollup = with_ or self._parse_wrapped_csv(self._parse_column) 3724 elements["rollup"].extend(ensure_list(rollup)) 3725 3726 if self._match(TokenType.CUBE): 3727 cube = with_ or self._parse_wrapped_csv(self._parse_column) 3728 elements["cube"].extend(ensure_list(cube)) 3729 3730 if self._match_text_seq("TOTALS"): 3731 totals = True 3732 elements["totals"] = True # type: ignore 3733 3734 if not (grouping_sets or rollup or cube or totals): 3735 if with_: 3736 self._retreat(index) 3737 break 3738 3739 return self.expression(exp.Group, **elements) # type: ignore 3740 3741 def _parse_grouping_sets(self) -> t.Optional[t.List[exp.Expression]]: 3742 if not self._match(TokenType.GROUPING_SETS): 3743 return None 3744 3745 return self._parse_wrapped_csv(self._parse_grouping_set) 3746 3747 def _parse_grouping_set(self) -> t.Optional[exp.Expression]: 3748 if self._match(TokenType.L_PAREN): 3749 grouping_set = self._parse_csv(self._parse_column) 3750 self._match_r_paren() 3751 return self.expression(exp.Tuple, expressions=grouping_set) 3752 3753 return self._parse_column() 3754 3755 def _parse_having(self, skip_having_token: bool = False) -> t.Optional[exp.Having]: 3756 if not skip_having_token and not self._match(TokenType.HAVING): 3757 return None 3758 return self.expression(exp.Having, this=self._parse_assignment()) 3759 3760 def _parse_qualify(self) -> t.Optional[exp.Qualify]: 3761 if not self._match(TokenType.QUALIFY): 3762 return None 3763 return self.expression(exp.Qualify, this=self._parse_assignment()) 3764 3765 def _parse_connect(self, skip_start_token: bool = False) -> t.Optional[exp.Connect]: 3766 if skip_start_token: 3767 start = None 3768 elif self._match(TokenType.START_WITH): 3769 start = self._parse_assignment() 3770 else: 3771 return None 3772 3773 self._match(TokenType.CONNECT_BY) 3774 nocycle = self._match_text_seq("NOCYCLE") 3775 self.NO_PAREN_FUNCTION_PARSERS["PRIOR"] = lambda self: self.expression( 3776 exp.Prior, this=self._parse_bitwise() 3777 ) 3778 connect = self._parse_assignment() 3779 self.NO_PAREN_FUNCTION_PARSERS.pop("PRIOR") 3780 3781 if not start and self._match(TokenType.START_WITH): 3782 start = self._parse_assignment() 3783 3784 return self.expression(exp.Connect, start=start, connect=connect, nocycle=nocycle) 3785 3786 def _parse_name_as_expression(self) -> exp.Alias: 3787 return self.expression( 3788 exp.Alias, 3789 alias=self._parse_id_var(any_token=True), 3790 this=self._match(TokenType.ALIAS) and self._parse_assignment(), 3791 ) 3792 3793 def _parse_interpolate(self) -> t.Optional[t.List[exp.Expression]]: 3794 if self._match_text_seq("INTERPOLATE"): 3795 return self._parse_wrapped_csv(self._parse_name_as_expression) 3796 return None 3797 3798 def _parse_order( 3799 self, this: t.Optional[exp.Expression] = None, skip_order_token: bool = False 3800 ) -> t.Optional[exp.Expression]: 3801 siblings = None 3802 if not skip_order_token and not self._match(TokenType.ORDER_BY): 3803 if not self._match(TokenType.ORDER_SIBLINGS_BY): 3804 return this 3805 3806 siblings = True 3807 3808 return self.expression( 3809 exp.Order, 3810 this=this, 3811 expressions=self._parse_csv(self._parse_ordered), 3812 interpolate=self._parse_interpolate(), 3813 siblings=siblings, 3814 ) 3815 3816 def _parse_sort(self, exp_class: t.Type[E], token: TokenType) -> t.Optional[E]: 3817 if not self._match(token): 3818 return None 3819 return self.expression(exp_class, expressions=self._parse_csv(self._parse_ordered)) 3820 3821 def _parse_ordered( 3822 self, parse_method: t.Optional[t.Callable] = None 3823 ) -> t.Optional[exp.Ordered]: 3824 this = parse_method() if parse_method else self._parse_assignment() 3825 if not this: 3826 return None 3827 3828 asc = self._match(TokenType.ASC) 3829 desc = self._match(TokenType.DESC) or (asc and False) 3830 3831 is_nulls_first = self._match_text_seq("NULLS", "FIRST") 3832 is_nulls_last = self._match_text_seq("NULLS", "LAST") 3833 3834 nulls_first = is_nulls_first or False 3835 explicitly_null_ordered = is_nulls_first or is_nulls_last 3836 3837 if ( 3838 not explicitly_null_ordered 3839 and ( 3840 (not desc and self.dialect.NULL_ORDERING == "nulls_are_small") 3841 or (desc and self.dialect.NULL_ORDERING != "nulls_are_small") 3842 ) 3843 and self.dialect.NULL_ORDERING != "nulls_are_last" 3844 ): 3845 nulls_first = True 3846 3847 if self._match_text_seq("WITH", "FILL"): 3848 with_fill = self.expression( 3849 exp.WithFill, 3850 **{ # type: ignore 3851 "from": self._match(TokenType.FROM) and self._parse_bitwise(), 3852 "to": self._match_text_seq("TO") and self._parse_bitwise(), 3853 "step": self._match_text_seq("STEP") and self._parse_bitwise(), 3854 }, 3855 ) 3856 else: 3857 with_fill = None 3858 3859 return self.expression( 3860 exp.Ordered, this=this, desc=desc, nulls_first=nulls_first, with_fill=with_fill 3861 ) 3862 3863 def _parse_limit( 3864 self, 3865 this: t.Optional[exp.Expression] = None, 3866 top: bool = False, 3867 skip_limit_token: bool = False, 3868 ) -> t.Optional[exp.Expression]: 3869 if skip_limit_token or self._match(TokenType.TOP if top else TokenType.LIMIT): 3870 comments = self._prev_comments 3871 if top: 3872 limit_paren = self._match(TokenType.L_PAREN) 3873 expression = self._parse_term() if limit_paren else self._parse_number() 3874 3875 if limit_paren: 3876 self._match_r_paren() 3877 else: 3878 expression = self._parse_term() 3879 3880 if self._match(TokenType.COMMA): 3881 offset = expression 3882 expression = self._parse_term() 3883 else: 3884 offset = None 3885 3886 limit_exp = self.expression( 3887 exp.Limit, 3888 this=this, 3889 expression=expression, 3890 offset=offset, 3891 comments=comments, 3892 expressions=self._parse_limit_by(), 3893 ) 3894 3895 return limit_exp 3896 3897 if self._match(TokenType.FETCH): 3898 direction = self._match_set((TokenType.FIRST, TokenType.NEXT)) 3899 direction = self._prev.text.upper() if direction else "FIRST" 3900 3901 count = self._parse_field(tokens=self.FETCH_TOKENS) 3902 percent = self._match(TokenType.PERCENT) 3903 3904 self._match_set((TokenType.ROW, TokenType.ROWS)) 3905 3906 only = self._match_text_seq("ONLY") 3907 with_ties = self._match_text_seq("WITH", "TIES") 3908 3909 if only and with_ties: 3910 self.raise_error("Cannot specify both ONLY and WITH TIES in FETCH clause") 3911 3912 return self.expression( 3913 exp.Fetch, 3914 direction=direction, 3915 count=count, 3916 percent=percent, 3917 with_ties=with_ties, 3918 ) 3919 3920 return this 3921 3922 def _parse_offset(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 3923 if not self._match(TokenType.OFFSET): 3924 return this 3925 3926 count = self._parse_term() 3927 self._match_set((TokenType.ROW, TokenType.ROWS)) 3928 3929 return self.expression( 3930 exp.Offset, this=this, expression=count, expressions=self._parse_limit_by() 3931 ) 3932 3933 def _parse_limit_by(self) -> t.Optional[t.List[exp.Expression]]: 3934 return self._match_text_seq("BY") and self._parse_csv(self._parse_bitwise) 3935 3936 def _parse_locks(self) -> t.List[exp.Lock]: 3937 locks = [] 3938 while True: 3939 if self._match_text_seq("FOR", "UPDATE"): 3940 update = True 3941 elif self._match_text_seq("FOR", "SHARE") or self._match_text_seq( 3942 "LOCK", "IN", "SHARE", "MODE" 3943 ): 3944 update = False 3945 else: 3946 break 3947 3948 expressions = None 3949 if self._match_text_seq("OF"): 3950 expressions = self._parse_csv(lambda: self._parse_table(schema=True)) 3951 3952 wait: t.Optional[bool | exp.Expression] = None 3953 if self._match_text_seq("NOWAIT"): 3954 wait = True 3955 elif self._match_text_seq("WAIT"): 3956 wait = self._parse_primary() 3957 elif self._match_text_seq("SKIP", "LOCKED"): 3958 wait = False 3959 3960 locks.append( 3961 self.expression(exp.Lock, update=update, expressions=expressions, wait=wait) 3962 ) 3963 3964 return locks 3965 3966 def _parse_set_operations(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3967 while this and self._match_set(self.SET_OPERATIONS): 3968 token_type = self._prev.token_type 3969 3970 if token_type == TokenType.UNION: 3971 operation: t.Type[exp.SetOperation] = exp.Union 3972 elif token_type == TokenType.EXCEPT: 3973 operation = exp.Except 3974 else: 3975 operation = exp.Intersect 3976 3977 comments = self._prev.comments 3978 distinct = self._match(TokenType.DISTINCT) or not self._match(TokenType.ALL) 3979 by_name = self._match_text_seq("BY", "NAME") 3980 expression = self._parse_select(nested=True, parse_set_operation=False) 3981 3982 this = self.expression( 3983 operation, 3984 comments=comments, 3985 this=this, 3986 distinct=distinct, 3987 by_name=by_name, 3988 expression=expression, 3989 ) 3990 3991 if isinstance(this, exp.SetOperation) and self.MODIFIERS_ATTACHED_TO_SET_OP: 3992 expression = this.expression 3993 3994 if expression: 3995 for arg in self.SET_OP_MODIFIERS: 3996 expr = expression.args.get(arg) 3997 if expr: 3998 this.set(arg, expr.pop()) 3999 4000 return this 4001 4002 def _parse_expression(self) -> t.Optional[exp.Expression]: 4003 return self._parse_alias(self._parse_assignment()) 4004 4005 def _parse_assignment(self) -> t.Optional[exp.Expression]: 4006 this = self._parse_disjunction() 4007 4008 while self._match_set(self.ASSIGNMENT): 4009 this = self.expression( 4010 self.ASSIGNMENT[self._prev.token_type], 4011 this=this, 4012 comments=self._prev_comments, 4013 expression=self._parse_assignment(), 4014 ) 4015 4016 return this 4017 4018 def _parse_disjunction(self) -> t.Optional[exp.Expression]: 4019 return self._parse_tokens(self._parse_conjunction, self.DISJUNCTION) 4020 4021 def _parse_conjunction(self) -> t.Optional[exp.Expression]: 4022 return self._parse_tokens(self._parse_equality, self.CONJUNCTION) 4023 4024 def _parse_equality(self) -> t.Optional[exp.Expression]: 4025 return self._parse_tokens(self._parse_comparison, self.EQUALITY) 4026 4027 def _parse_comparison(self) -> t.Optional[exp.Expression]: 4028 return self._parse_tokens(self._parse_range, self.COMPARISON) 4029 4030 def _parse_range(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 4031 this = this or self._parse_bitwise() 4032 negate = self._match(TokenType.NOT) 4033 4034 if self._match_set(self.RANGE_PARSERS): 4035 expression = self.RANGE_PARSERS[self._prev.token_type](self, this) 4036 if not expression: 4037 return this 4038 4039 this = expression 4040 elif self._match(TokenType.ISNULL): 4041 this = self.expression(exp.Is, this=this, expression=exp.Null()) 4042 4043 # Postgres supports ISNULL and NOTNULL for conditions. 4044 # https://blog.andreiavram.ro/postgresql-null-composite-type/ 4045 if self._match(TokenType.NOTNULL): 4046 this = self.expression(exp.Is, this=this, expression=exp.Null()) 4047 this = self.expression(exp.Not, this=this) 4048 4049 if negate: 4050 this = self.expression(exp.Not, this=this) 4051 4052 if self._match(TokenType.IS): 4053 this = self._parse_is(this) 4054 4055 return this 4056 4057 def _parse_is(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4058 index = self._index - 1 4059 negate = self._match(TokenType.NOT) 4060 4061 if self._match_text_seq("DISTINCT", "FROM"): 4062 klass = exp.NullSafeEQ if negate else exp.NullSafeNEQ 4063 return self.expression(klass, this=this, expression=self._parse_bitwise()) 4064 4065 expression = self._parse_null() or self._parse_boolean() 4066 if not expression: 4067 self._retreat(index) 4068 return None 4069 4070 this = self.expression(exp.Is, this=this, expression=expression) 4071 return self.expression(exp.Not, this=this) if negate else this 4072 4073 def _parse_in(self, this: t.Optional[exp.Expression], alias: bool = False) -> exp.In: 4074 unnest = self._parse_unnest(with_alias=False) 4075 if unnest: 4076 this = self.expression(exp.In, this=this, unnest=unnest) 4077 elif self._match_set((TokenType.L_PAREN, TokenType.L_BRACKET)): 4078 matched_l_paren = self._prev.token_type == TokenType.L_PAREN 4079 expressions = self._parse_csv(lambda: self._parse_select_or_expression(alias=alias)) 4080 4081 if len(expressions) == 1 and isinstance(expressions[0], exp.Query): 4082 this = self.expression(exp.In, this=this, query=expressions[0].subquery(copy=False)) 4083 else: 4084 this = self.expression(exp.In, this=this, expressions=expressions) 4085 4086 if matched_l_paren: 4087 self._match_r_paren(this) 4088 elif not self._match(TokenType.R_BRACKET, expression=this): 4089 self.raise_error("Expecting ]") 4090 else: 4091 this = self.expression(exp.In, this=this, field=self._parse_field()) 4092 4093 return this 4094 4095 def _parse_between(self, this: t.Optional[exp.Expression]) -> exp.Between: 4096 low = self._parse_bitwise() 4097 self._match(TokenType.AND) 4098 high = self._parse_bitwise() 4099 return self.expression(exp.Between, this=this, low=low, high=high) 4100 4101 def _parse_escape(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4102 if not self._match(TokenType.ESCAPE): 4103 return this 4104 return self.expression(exp.Escape, this=this, expression=self._parse_string()) 4105 4106 def _parse_interval(self, match_interval: bool = True) -> t.Optional[exp.Add | exp.Interval]: 4107 index = self._index 4108 4109 if not self._match(TokenType.INTERVAL) and match_interval: 4110 return None 4111 4112 if self._match(TokenType.STRING, advance=False): 4113 this = self._parse_primary() 4114 else: 4115 this = self._parse_term() 4116 4117 if not this or ( 4118 isinstance(this, exp.Column) 4119 and not this.table 4120 and not this.this.quoted 4121 and this.name.upper() == "IS" 4122 ): 4123 self._retreat(index) 4124 return None 4125 4126 unit = self._parse_function() or ( 4127 not self._match(TokenType.ALIAS, advance=False) 4128 and self._parse_var(any_token=True, upper=True) 4129 ) 4130 4131 # Most dialects support, e.g., the form INTERVAL '5' day, thus we try to parse 4132 # each INTERVAL expression into this canonical form so it's easy to transpile 4133 if this and this.is_number: 4134 this = exp.Literal.string(this.name) 4135 elif this and this.is_string: 4136 parts = exp.INTERVAL_STRING_RE.findall(this.name) 4137 if len(parts) == 1: 4138 if unit: 4139 # Unconsume the eagerly-parsed unit, since the real unit was part of the string 4140 self._retreat(self._index - 1) 4141 4142 this = exp.Literal.string(parts[0][0]) 4143 unit = self.expression(exp.Var, this=parts[0][1].upper()) 4144 4145 if self.INTERVAL_SPANS and self._match_text_seq("TO"): 4146 unit = self.expression( 4147 exp.IntervalSpan, this=unit, expression=self._parse_var(any_token=True, upper=True) 4148 ) 4149 4150 interval = self.expression(exp.Interval, this=this, unit=unit) 4151 4152 index = self._index 4153 self._match(TokenType.PLUS) 4154 4155 # Convert INTERVAL 'val_1' unit_1 [+] ... [+] 'val_n' unit_n into a sum of intervals 4156 if self._match_set((TokenType.STRING, TokenType.NUMBER), advance=False): 4157 return self.expression( 4158 exp.Add, this=interval, expression=self._parse_interval(match_interval=False) 4159 ) 4160 4161 self._retreat(index) 4162 return interval 4163 4164 def _parse_bitwise(self) -> t.Optional[exp.Expression]: 4165 this = self._parse_term() 4166 4167 while True: 4168 if self._match_set(self.BITWISE): 4169 this = self.expression( 4170 self.BITWISE[self._prev.token_type], 4171 this=this, 4172 expression=self._parse_term(), 4173 ) 4174 elif self.dialect.DPIPE_IS_STRING_CONCAT and self._match(TokenType.DPIPE): 4175 this = self.expression( 4176 exp.DPipe, 4177 this=this, 4178 expression=self._parse_term(), 4179 safe=not self.dialect.STRICT_STRING_CONCAT, 4180 ) 4181 elif self._match(TokenType.DQMARK): 4182 this = self.expression(exp.Coalesce, this=this, expressions=self._parse_term()) 4183 elif self._match_pair(TokenType.LT, TokenType.LT): 4184 this = self.expression( 4185 exp.BitwiseLeftShift, this=this, expression=self._parse_term() 4186 ) 4187 elif self._match_pair(TokenType.GT, TokenType.GT): 4188 this = self.expression( 4189 exp.BitwiseRightShift, this=this, expression=self._parse_term() 4190 ) 4191 else: 4192 break 4193 4194 return this 4195 4196 def _parse_term(self) -> t.Optional[exp.Expression]: 4197 return self._parse_tokens(self._parse_factor, self.TERM) 4198 4199 def _parse_factor(self) -> t.Optional[exp.Expression]: 4200 parse_method = self._parse_exponent if self.EXPONENT else self._parse_unary 4201 this = parse_method() 4202 4203 while self._match_set(self.FACTOR): 4204 klass = self.FACTOR[self._prev.token_type] 4205 comments = self._prev_comments 4206 expression = parse_method() 4207 4208 if not expression and klass is exp.IntDiv and self._prev.text.isalpha(): 4209 self._retreat(self._index - 1) 4210 return this 4211 4212 this = self.expression(klass, this=this, comments=comments, expression=expression) 4213 4214 if isinstance(this, exp.Div): 4215 this.args["typed"] = self.dialect.TYPED_DIVISION 4216 this.args["safe"] = self.dialect.SAFE_DIVISION 4217 4218 return this 4219 4220 def _parse_exponent(self) -> t.Optional[exp.Expression]: 4221 return self._parse_tokens(self._parse_unary, self.EXPONENT) 4222 4223 def _parse_unary(self) -> t.Optional[exp.Expression]: 4224 if self._match_set(self.UNARY_PARSERS): 4225 return self.UNARY_PARSERS[self._prev.token_type](self) 4226 return self._parse_at_time_zone(self._parse_type()) 4227 4228 def _parse_type( 4229 self, parse_interval: bool = True, fallback_to_identifier: bool = False 4230 ) -> t.Optional[exp.Expression]: 4231 interval = parse_interval and self._parse_interval() 4232 if interval: 4233 return interval 4234 4235 index = self._index 4236 data_type = self._parse_types(check_func=True, allow_identifiers=False) 4237 4238 if data_type: 4239 index2 = self._index 4240 this = self._parse_primary() 4241 4242 if isinstance(this, exp.Literal): 4243 parser = self.TYPE_LITERAL_PARSERS.get(data_type.this) 4244 if parser: 4245 return parser(self, this, data_type) 4246 4247 return self.expression(exp.Cast, this=this, to=data_type) 4248 4249 # The expressions arg gets set by the parser when we have something like DECIMAL(38, 0) 4250 # in the input SQL. In that case, we'll produce these tokens: DECIMAL ( 38 , 0 ) 4251 # 4252 # If the index difference here is greater than 1, that means the parser itself must have 4253 # consumed additional tokens such as the DECIMAL scale and precision in the above example. 4254 # 4255 # If it's not greater than 1, then it must be 1, because we've consumed at least the type 4256 # keyword, meaning that the expressions arg of the DataType must have gotten set by a 4257 # callable in the TYPE_CONVERTERS mapping. For example, Snowflake converts DECIMAL to 4258 # DECIMAL(38, 0)) in order to facilitate the data type's transpilation. 4259 # 4260 # In these cases, we don't really want to return the converted type, but instead retreat 4261 # and try to parse a Column or Identifier in the section below. 4262 if data_type.expressions and index2 - index > 1: 4263 self._retreat(index2) 4264 return self._parse_column_ops(data_type) 4265 4266 self._retreat(index) 4267 4268 if fallback_to_identifier: 4269 return self._parse_id_var() 4270 4271 this = self._parse_column() 4272 return this and self._parse_column_ops(this) 4273 4274 def _parse_type_size(self) -> t.Optional[exp.DataTypeParam]: 4275 this = self._parse_type() 4276 if not this: 4277 return None 4278 4279 if isinstance(this, exp.Column) and not this.table: 4280 this = exp.var(this.name.upper()) 4281 4282 return self.expression( 4283 exp.DataTypeParam, this=this, expression=self._parse_var(any_token=True) 4284 ) 4285 4286 def _parse_types( 4287 self, check_func: bool = False, schema: bool = False, allow_identifiers: bool = True 4288 ) -> t.Optional[exp.Expression]: 4289 index = self._index 4290 4291 this: t.Optional[exp.Expression] = None 4292 prefix = self._match_text_seq("SYSUDTLIB", ".") 4293 4294 if not self._match_set(self.TYPE_TOKENS): 4295 identifier = allow_identifiers and self._parse_id_var( 4296 any_token=False, tokens=(TokenType.VAR,) 4297 ) 4298 if isinstance(identifier, exp.Identifier): 4299 tokens = self.dialect.tokenize(identifier.sql(dialect=self.dialect)) 4300 4301 if len(tokens) != 1: 4302 self.raise_error("Unexpected identifier", self._prev) 4303 4304 if tokens[0].token_type in self.TYPE_TOKENS: 4305 self._prev = tokens[0] 4306 elif self.dialect.SUPPORTS_USER_DEFINED_TYPES: 4307 type_name = identifier.name 4308 4309 while self._match(TokenType.DOT): 4310 type_name = f"{type_name}.{self._advance_any() and self._prev.text}" 4311 4312 this = exp.DataType.build(type_name, udt=True) 4313 else: 4314 self._retreat(self._index - 1) 4315 return None 4316 else: 4317 return None 4318 4319 type_token = self._prev.token_type 4320 4321 if type_token == TokenType.PSEUDO_TYPE: 4322 return self.expression(exp.PseudoType, this=self._prev.text.upper()) 4323 4324 if type_token == TokenType.OBJECT_IDENTIFIER: 4325 return self.expression(exp.ObjectIdentifier, this=self._prev.text.upper()) 4326 4327 # https://materialize.com/docs/sql/types/map/ 4328 if type_token == TokenType.MAP and self._match(TokenType.L_BRACKET): 4329 key_type = self._parse_types( 4330 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 4331 ) 4332 if not self._match(TokenType.FARROW): 4333 self._retreat(index) 4334 return None 4335 4336 value_type = self._parse_types( 4337 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 4338 ) 4339 if not self._match(TokenType.R_BRACKET): 4340 self._retreat(index) 4341 return None 4342 4343 return exp.DataType( 4344 this=exp.DataType.Type.MAP, 4345 expressions=[key_type, value_type], 4346 nested=True, 4347 prefix=prefix, 4348 ) 4349 4350 nested = type_token in self.NESTED_TYPE_TOKENS 4351 is_struct = type_token in self.STRUCT_TYPE_TOKENS 4352 is_aggregate = type_token in self.AGGREGATE_TYPE_TOKENS 4353 expressions = None 4354 maybe_func = False 4355 4356 if self._match(TokenType.L_PAREN): 4357 if is_struct: 4358 expressions = self._parse_csv(lambda: self._parse_struct_types(type_required=True)) 4359 elif nested: 4360 expressions = self._parse_csv( 4361 lambda: self._parse_types( 4362 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 4363 ) 4364 ) 4365 elif type_token in self.ENUM_TYPE_TOKENS: 4366 expressions = self._parse_csv(self._parse_equality) 4367 elif is_aggregate: 4368 func_or_ident = self._parse_function(anonymous=True) or self._parse_id_var( 4369 any_token=False, tokens=(TokenType.VAR,) 4370 ) 4371 if not func_or_ident or not self._match(TokenType.COMMA): 4372 return None 4373 expressions = self._parse_csv( 4374 lambda: self._parse_types( 4375 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 4376 ) 4377 ) 4378 expressions.insert(0, func_or_ident) 4379 else: 4380 expressions = self._parse_csv(self._parse_type_size) 4381 4382 if not expressions or not self._match(TokenType.R_PAREN): 4383 self._retreat(index) 4384 return None 4385 4386 maybe_func = True 4387 4388 values: t.Optional[t.List[exp.Expression]] = None 4389 4390 if nested and self._match(TokenType.LT): 4391 if is_struct: 4392 expressions = self._parse_csv(lambda: self._parse_struct_types(type_required=True)) 4393 else: 4394 expressions = self._parse_csv( 4395 lambda: self._parse_types( 4396 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 4397 ) 4398 ) 4399 4400 if not self._match(TokenType.GT): 4401 self.raise_error("Expecting >") 4402 4403 if self._match_set((TokenType.L_BRACKET, TokenType.L_PAREN)): 4404 values = self._parse_csv(self._parse_assignment) 4405 self._match_set((TokenType.R_BRACKET, TokenType.R_PAREN)) 4406 4407 if type_token in self.TIMESTAMPS: 4408 if self._match_text_seq("WITH", "TIME", "ZONE"): 4409 maybe_func = False 4410 tz_type = ( 4411 exp.DataType.Type.TIMETZ 4412 if type_token in self.TIMES 4413 else exp.DataType.Type.TIMESTAMPTZ 4414 ) 4415 this = exp.DataType(this=tz_type, expressions=expressions) 4416 elif self._match_text_seq("WITH", "LOCAL", "TIME", "ZONE"): 4417 maybe_func = False 4418 this = exp.DataType(this=exp.DataType.Type.TIMESTAMPLTZ, expressions=expressions) 4419 elif self._match_text_seq("WITHOUT", "TIME", "ZONE"): 4420 maybe_func = False 4421 elif type_token == TokenType.INTERVAL: 4422 unit = self._parse_var(upper=True) 4423 if unit: 4424 if self._match_text_seq("TO"): 4425 unit = exp.IntervalSpan(this=unit, expression=self._parse_var(upper=True)) 4426 4427 this = self.expression(exp.DataType, this=self.expression(exp.Interval, unit=unit)) 4428 else: 4429 this = self.expression(exp.DataType, this=exp.DataType.Type.INTERVAL) 4430 4431 if maybe_func and check_func: 4432 index2 = self._index 4433 peek = self._parse_string() 4434 4435 if not peek: 4436 self._retreat(index) 4437 return None 4438 4439 self._retreat(index2) 4440 4441 if not this: 4442 if self._match_text_seq("UNSIGNED"): 4443 unsigned_type_token = self.SIGNED_TO_UNSIGNED_TYPE_TOKEN.get(type_token) 4444 if not unsigned_type_token: 4445 self.raise_error(f"Cannot convert {type_token.value} to unsigned.") 4446 4447 type_token = unsigned_type_token or type_token 4448 4449 this = exp.DataType( 4450 this=exp.DataType.Type[type_token.value], 4451 expressions=expressions, 4452 nested=nested, 4453 values=values, 4454 prefix=prefix, 4455 ) 4456 elif expressions: 4457 this.set("expressions", expressions) 4458 4459 # https://materialize.com/docs/sql/types/list/#type-name 4460 while self._match(TokenType.LIST): 4461 this = exp.DataType(this=exp.DataType.Type.LIST, expressions=[this], nested=True) 4462 4463 index = self._index 4464 4465 # Postgres supports the INT ARRAY[3] syntax as a synonym for INT[3] 4466 matched_array = self._match(TokenType.ARRAY) 4467 4468 while self._curr: 4469 matched_l_bracket = self._match(TokenType.L_BRACKET) 4470 if not matched_l_bracket and not matched_array: 4471 break 4472 4473 matched_array = False 4474 values = self._parse_csv(self._parse_assignment) or None 4475 if values and not schema: 4476 self._retreat(index) 4477 break 4478 4479 this = exp.DataType( 4480 this=exp.DataType.Type.ARRAY, expressions=[this], values=values, nested=True 4481 ) 4482 self._match(TokenType.R_BRACKET) 4483 4484 if self.TYPE_CONVERTERS and isinstance(this.this, exp.DataType.Type): 4485 converter = self.TYPE_CONVERTERS.get(this.this) 4486 if converter: 4487 this = converter(t.cast(exp.DataType, this)) 4488 4489 return this 4490 4491 def _parse_struct_types(self, type_required: bool = False) -> t.Optional[exp.Expression]: 4492 index = self._index 4493 4494 if ( 4495 self._curr 4496 and self._next 4497 and self._curr.token_type in self.TYPE_TOKENS 4498 and self._next.token_type in self.TYPE_TOKENS 4499 ): 4500 # Takes care of special cases like `STRUCT<list ARRAY<...>>` where the identifier is also a 4501 # type token. Without this, the list will be parsed as a type and we'll eventually crash 4502 this = self._parse_id_var() 4503 else: 4504 this = ( 4505 self._parse_type(parse_interval=False, fallback_to_identifier=True) 4506 or self._parse_id_var() 4507 ) 4508 4509 self._match(TokenType.COLON) 4510 4511 if ( 4512 type_required 4513 and not isinstance(this, exp.DataType) 4514 and not self._match_set(self.TYPE_TOKENS, advance=False) 4515 ): 4516 self._retreat(index) 4517 return self._parse_types() 4518 4519 return self._parse_column_def(this) 4520 4521 def _parse_at_time_zone(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4522 if not self._match_text_seq("AT", "TIME", "ZONE"): 4523 return this 4524 return self.expression(exp.AtTimeZone, this=this, zone=self._parse_unary()) 4525 4526 def _parse_column(self) -> t.Optional[exp.Expression]: 4527 this = self._parse_column_reference() 4528 column = self._parse_column_ops(this) if this else self._parse_bracket(this) 4529 4530 if self.dialect.SUPPORTS_COLUMN_JOIN_MARKS and column: 4531 column.set("join_mark", self._match(TokenType.JOIN_MARKER)) 4532 4533 return column 4534 4535 def _parse_column_reference(self) -> t.Optional[exp.Expression]: 4536 this = self._parse_field() 4537 if ( 4538 not this 4539 and self._match(TokenType.VALUES, advance=False) 4540 and self.VALUES_FOLLOWED_BY_PAREN 4541 and (not self._next or self._next.token_type != TokenType.L_PAREN) 4542 ): 4543 this = self._parse_id_var() 4544 4545 if isinstance(this, exp.Identifier): 4546 # We bubble up comments from the Identifier to the Column 4547 this = self.expression(exp.Column, comments=this.pop_comments(), this=this) 4548 4549 return this 4550 4551 def _parse_colon_as_json_extract( 4552 self, this: t.Optional[exp.Expression] 4553 ) -> t.Optional[exp.Expression]: 4554 casts = [] 4555 json_path = [] 4556 4557 while self._match(TokenType.COLON): 4558 start_index = self._index 4559 4560 # Snowflake allows reserved keywords as json keys but advance_any() excludes TokenType.SELECT from any_tokens=True 4561 path = self._parse_column_ops( 4562 self._parse_field(any_token=True, tokens=(TokenType.SELECT,)) 4563 ) 4564 4565 # The cast :: operator has a lower precedence than the extraction operator :, so 4566 # we rearrange the AST appropriately to avoid casting the JSON path 4567 while isinstance(path, exp.Cast): 4568 casts.append(path.to) 4569 path = path.this 4570 4571 if casts: 4572 dcolon_offset = next( 4573 i 4574 for i, t in enumerate(self._tokens[start_index:]) 4575 if t.token_type == TokenType.DCOLON 4576 ) 4577 end_token = self._tokens[start_index + dcolon_offset - 1] 4578 else: 4579 end_token = self._prev 4580 4581 if path: 4582 json_path.append(self._find_sql(self._tokens[start_index], end_token)) 4583 4584 if json_path: 4585 this = self.expression( 4586 exp.JSONExtract, 4587 this=this, 4588 expression=self.dialect.to_json_path(exp.Literal.string(".".join(json_path))), 4589 ) 4590 4591 while casts: 4592 this = self.expression(exp.Cast, this=this, to=casts.pop()) 4593 4594 return this 4595 4596 def _parse_column_ops(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4597 this = self._parse_bracket(this) 4598 4599 while self._match_set(self.COLUMN_OPERATORS): 4600 op_token = self._prev.token_type 4601 op = self.COLUMN_OPERATORS.get(op_token) 4602 4603 if op_token == TokenType.DCOLON: 4604 field = self._parse_types() 4605 if not field: 4606 self.raise_error("Expected type") 4607 elif op and self._curr: 4608 field = self._parse_column_reference() 4609 else: 4610 field = self._parse_field(any_token=True, anonymous_func=True) 4611 4612 if isinstance(field, exp.Func) and this: 4613 # bigquery allows function calls like x.y.count(...) 4614 # SAFE.SUBSTR(...) 4615 # https://cloud.google.com/bigquery/docs/reference/standard-sql/functions-reference#function_call_rules 4616 this = exp.replace_tree( 4617 this, 4618 lambda n: ( 4619 self.expression(exp.Dot, this=n.args.get("table"), expression=n.this) 4620 if n.table 4621 else n.this 4622 ) 4623 if isinstance(n, exp.Column) 4624 else n, 4625 ) 4626 4627 if op: 4628 this = op(self, this, field) 4629 elif isinstance(this, exp.Column) and not this.args.get("catalog"): 4630 this = self.expression( 4631 exp.Column, 4632 this=field, 4633 table=this.this, 4634 db=this.args.get("table"), 4635 catalog=this.args.get("db"), 4636 ) 4637 else: 4638 this = self.expression(exp.Dot, this=this, expression=field) 4639 4640 this = self._parse_bracket(this) 4641 4642 return self._parse_colon_as_json_extract(this) if self.COLON_IS_JSON_EXTRACT else this 4643 4644 def _parse_primary(self) -> t.Optional[exp.Expression]: 4645 if self._match_set(self.PRIMARY_PARSERS): 4646 token_type = self._prev.token_type 4647 primary = self.PRIMARY_PARSERS[token_type](self, self._prev) 4648 4649 if token_type == TokenType.STRING: 4650 expressions = [primary] 4651 while self._match(TokenType.STRING): 4652 expressions.append(exp.Literal.string(self._prev.text)) 4653 4654 if len(expressions) > 1: 4655 return self.expression(exp.Concat, expressions=expressions) 4656 4657 return primary 4658 4659 if self._match_pair(TokenType.DOT, TokenType.NUMBER): 4660 return exp.Literal.number(f"0.{self._prev.text}") 4661 4662 if self._match(TokenType.L_PAREN): 4663 comments = self._prev_comments 4664 query = self._parse_select() 4665 4666 if query: 4667 expressions = [query] 4668 else: 4669 expressions = self._parse_expressions() 4670 4671 this = self._parse_query_modifiers(seq_get(expressions, 0)) 4672 4673 if not this and self._match(TokenType.R_PAREN, advance=False): 4674 this = self.expression(exp.Tuple) 4675 elif isinstance(this, exp.UNWRAPPED_QUERIES): 4676 this = self._parse_subquery(this=this, parse_alias=False) 4677 elif isinstance(this, exp.Subquery): 4678 this = self._parse_subquery( 4679 this=self._parse_set_operations(this), parse_alias=False 4680 ) 4681 elif len(expressions) > 1 or self._prev.token_type == TokenType.COMMA: 4682 this = self.expression(exp.Tuple, expressions=expressions) 4683 else: 4684 this = self.expression(exp.Paren, this=this) 4685 4686 if this: 4687 this.add_comments(comments) 4688 4689 self._match_r_paren(expression=this) 4690 return this 4691 4692 return None 4693 4694 def _parse_field( 4695 self, 4696 any_token: bool = False, 4697 tokens: t.Optional[t.Collection[TokenType]] = None, 4698 anonymous_func: bool = False, 4699 ) -> t.Optional[exp.Expression]: 4700 if anonymous_func: 4701 field = ( 4702 self._parse_function(anonymous=anonymous_func, any_token=any_token) 4703 or self._parse_primary() 4704 ) 4705 else: 4706 field = self._parse_primary() or self._parse_function( 4707 anonymous=anonymous_func, any_token=any_token 4708 ) 4709 return field or self._parse_id_var(any_token=any_token, tokens=tokens) 4710 4711 def _parse_function( 4712 self, 4713 functions: t.Optional[t.Dict[str, t.Callable]] = None, 4714 anonymous: bool = False, 4715 optional_parens: bool = True, 4716 any_token: bool = False, 4717 ) -> t.Optional[exp.Expression]: 4718 # This allows us to also parse {fn <function>} syntax (Snowflake, MySQL support this) 4719 # See: https://community.snowflake.com/s/article/SQL-Escape-Sequences 4720 fn_syntax = False 4721 if ( 4722 self._match(TokenType.L_BRACE, advance=False) 4723 and self._next 4724 and self._next.text.upper() == "FN" 4725 ): 4726 self._advance(2) 4727 fn_syntax = True 4728 4729 func = self._parse_function_call( 4730 functions=functions, 4731 anonymous=anonymous, 4732 optional_parens=optional_parens, 4733 any_token=any_token, 4734 ) 4735 4736 if fn_syntax: 4737 self._match(TokenType.R_BRACE) 4738 4739 return func 4740 4741 def _parse_function_call( 4742 self, 4743 functions: t.Optional[t.Dict[str, t.Callable]] = None, 4744 anonymous: bool = False, 4745 optional_parens: bool = True, 4746 any_token: bool = False, 4747 ) -> t.Optional[exp.Expression]: 4748 if not self._curr: 4749 return None 4750 4751 comments = self._curr.comments 4752 token_type = self._curr.token_type 4753 this = self._curr.text 4754 upper = this.upper() 4755 4756 parser = self.NO_PAREN_FUNCTION_PARSERS.get(upper) 4757 if optional_parens and parser and token_type not in self.INVALID_FUNC_NAME_TOKENS: 4758 self._advance() 4759 return self._parse_window(parser(self)) 4760 4761 if not self._next or self._next.token_type != TokenType.L_PAREN: 4762 if optional_parens and token_type in self.NO_PAREN_FUNCTIONS: 4763 self._advance() 4764 return self.expression(self.NO_PAREN_FUNCTIONS[token_type]) 4765 4766 return None 4767 4768 if any_token: 4769 if token_type in self.RESERVED_TOKENS: 4770 return None 4771 elif token_type not in self.FUNC_TOKENS: 4772 return None 4773 4774 self._advance(2) 4775 4776 parser = self.FUNCTION_PARSERS.get(upper) 4777 if parser and not anonymous: 4778 this = parser(self) 4779 else: 4780 subquery_predicate = self.SUBQUERY_PREDICATES.get(token_type) 4781 4782 if subquery_predicate and self._curr.token_type in (TokenType.SELECT, TokenType.WITH): 4783 this = self.expression(subquery_predicate, this=self._parse_select()) 4784 self._match_r_paren() 4785 return this 4786 4787 if functions is None: 4788 functions = self.FUNCTIONS 4789 4790 function = functions.get(upper) 4791 4792 alias = upper in self.FUNCTIONS_WITH_ALIASED_ARGS 4793 args = self._parse_csv(lambda: self._parse_lambda(alias=alias)) 4794 4795 if alias: 4796 args = self._kv_to_prop_eq(args) 4797 4798 if function and not anonymous: 4799 if "dialect" in function.__code__.co_varnames: 4800 func = function(args, dialect=self.dialect) 4801 else: 4802 func = function(args) 4803 4804 func = self.validate_expression(func, args) 4805 if not self.dialect.NORMALIZE_FUNCTIONS: 4806 func.meta["name"] = this 4807 4808 this = func 4809 else: 4810 if token_type == TokenType.IDENTIFIER: 4811 this = exp.Identifier(this=this, quoted=True) 4812 this = self.expression(exp.Anonymous, this=this, expressions=args) 4813 4814 if isinstance(this, exp.Expression): 4815 this.add_comments(comments) 4816 4817 self._match_r_paren(this) 4818 return self._parse_window(this) 4819 4820 def _kv_to_prop_eq(self, expressions: t.List[exp.Expression]) -> t.List[exp.Expression]: 4821 transformed = [] 4822 4823 for e in expressions: 4824 if isinstance(e, self.KEY_VALUE_DEFINITIONS): 4825 if isinstance(e, exp.Alias): 4826 e = self.expression(exp.PropertyEQ, this=e.args.get("alias"), expression=e.this) 4827 4828 if not isinstance(e, exp.PropertyEQ): 4829 e = self.expression( 4830 exp.PropertyEQ, this=exp.to_identifier(e.this.name), expression=e.expression 4831 ) 4832 4833 if isinstance(e.this, exp.Column): 4834 e.this.replace(e.this.this) 4835 4836 transformed.append(e) 4837 4838 return transformed 4839 4840 def _parse_function_parameter(self) -> t.Optional[exp.Expression]: 4841 return self._parse_column_def(self._parse_id_var()) 4842 4843 def _parse_user_defined_function( 4844 self, kind: t.Optional[TokenType] = None 4845 ) -> t.Optional[exp.Expression]: 4846 this = self._parse_id_var() 4847 4848 while self._match(TokenType.DOT): 4849 this = self.expression(exp.Dot, this=this, expression=self._parse_id_var()) 4850 4851 if not self._match(TokenType.L_PAREN): 4852 return this 4853 4854 expressions = self._parse_csv(self._parse_function_parameter) 4855 self._match_r_paren() 4856 return self.expression( 4857 exp.UserDefinedFunction, this=this, expressions=expressions, wrapped=True 4858 ) 4859 4860 def _parse_introducer(self, token: Token) -> exp.Introducer | exp.Identifier: 4861 literal = self._parse_primary() 4862 if literal: 4863 return self.expression(exp.Introducer, this=token.text, expression=literal) 4864 4865 return self.expression(exp.Identifier, this=token.text) 4866 4867 def _parse_session_parameter(self) -> exp.SessionParameter: 4868 kind = None 4869 this = self._parse_id_var() or self._parse_primary() 4870 4871 if this and self._match(TokenType.DOT): 4872 kind = this.name 4873 this = self._parse_var() or self._parse_primary() 4874 4875 return self.expression(exp.SessionParameter, this=this, kind=kind) 4876 4877 def _parse_lambda_arg(self) -> t.Optional[exp.Expression]: 4878 return self._parse_id_var() 4879 4880 def _parse_lambda(self, alias: bool = False) -> t.Optional[exp.Expression]: 4881 index = self._index 4882 4883 if self._match(TokenType.L_PAREN): 4884 expressions = t.cast( 4885 t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_lambda_arg) 4886 ) 4887 4888 if not self._match(TokenType.R_PAREN): 4889 self._retreat(index) 4890 else: 4891 expressions = [self._parse_lambda_arg()] 4892 4893 if self._match_set(self.LAMBDAS): 4894 return self.LAMBDAS[self._prev.token_type](self, expressions) 4895 4896 self._retreat(index) 4897 4898 this: t.Optional[exp.Expression] 4899 4900 if self._match(TokenType.DISTINCT): 4901 this = self.expression( 4902 exp.Distinct, expressions=self._parse_csv(self._parse_assignment) 4903 ) 4904 else: 4905 this = self._parse_select_or_expression(alias=alias) 4906 4907 return self._parse_limit( 4908 self._parse_order(self._parse_having_max(self._parse_respect_or_ignore_nulls(this))) 4909 ) 4910 4911 def _parse_schema(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 4912 index = self._index 4913 if not self._match(TokenType.L_PAREN): 4914 return this 4915 4916 # Disambiguate between schema and subquery/CTE, e.g. in INSERT INTO table (<expr>), 4917 # expr can be of both types 4918 if self._match_set(self.SELECT_START_TOKENS): 4919 self._retreat(index) 4920 return this 4921 args = self._parse_csv(lambda: self._parse_constraint() or self._parse_field_def()) 4922 self._match_r_paren() 4923 return self.expression(exp.Schema, this=this, expressions=args) 4924 4925 def _parse_field_def(self) -> t.Optional[exp.Expression]: 4926 return self._parse_column_def(self._parse_field(any_token=True)) 4927 4928 def _parse_column_def(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4929 # column defs are not really columns, they're identifiers 4930 if isinstance(this, exp.Column): 4931 this = this.this 4932 4933 kind = self._parse_types(schema=True) 4934 4935 if self._match_text_seq("FOR", "ORDINALITY"): 4936 return self.expression(exp.ColumnDef, this=this, ordinality=True) 4937 4938 constraints: t.List[exp.Expression] = [] 4939 4940 if (not kind and self._match(TokenType.ALIAS)) or self._match_texts( 4941 ("ALIAS", "MATERIALIZED") 4942 ): 4943 persisted = self._prev.text.upper() == "MATERIALIZED" 4944 constraints.append( 4945 self.expression( 4946 exp.ComputedColumnConstraint, 4947 this=self._parse_assignment(), 4948 persisted=persisted or self._match_text_seq("PERSISTED"), 4949 not_null=self._match_pair(TokenType.NOT, TokenType.NULL), 4950 ) 4951 ) 4952 elif kind and self._match_pair(TokenType.ALIAS, TokenType.L_PAREN, advance=False): 4953 self._match(TokenType.ALIAS) 4954 constraints.append( 4955 self.expression(exp.TransformColumnConstraint, this=self._parse_field()) 4956 ) 4957 4958 while True: 4959 constraint = self._parse_column_constraint() 4960 if not constraint: 4961 break 4962 constraints.append(constraint) 4963 4964 if not kind and not constraints: 4965 return this 4966 4967 return self.expression(exp.ColumnDef, this=this, kind=kind, constraints=constraints) 4968 4969 def _parse_auto_increment( 4970 self, 4971 ) -> exp.GeneratedAsIdentityColumnConstraint | exp.AutoIncrementColumnConstraint: 4972 start = None 4973 increment = None 4974 4975 if self._match(TokenType.L_PAREN, advance=False): 4976 args = self._parse_wrapped_csv(self._parse_bitwise) 4977 start = seq_get(args, 0) 4978 increment = seq_get(args, 1) 4979 elif self._match_text_seq("START"): 4980 start = self._parse_bitwise() 4981 self._match_text_seq("INCREMENT") 4982 increment = self._parse_bitwise() 4983 4984 if start and increment: 4985 return exp.GeneratedAsIdentityColumnConstraint(start=start, increment=increment) 4986 4987 return exp.AutoIncrementColumnConstraint() 4988 4989 def _parse_auto_property(self) -> t.Optional[exp.AutoRefreshProperty]: 4990 if not self._match_text_seq("REFRESH"): 4991 self._retreat(self._index - 1) 4992 return None 4993 return self.expression(exp.AutoRefreshProperty, this=self._parse_var(upper=True)) 4994 4995 def _parse_compress(self) -> exp.CompressColumnConstraint: 4996 if self._match(TokenType.L_PAREN, advance=False): 4997 return self.expression( 4998 exp.CompressColumnConstraint, this=self._parse_wrapped_csv(self._parse_bitwise) 4999 ) 5000 5001 return self.expression(exp.CompressColumnConstraint, this=self._parse_bitwise()) 5002 5003 def _parse_generated_as_identity( 5004 self, 5005 ) -> ( 5006 exp.GeneratedAsIdentityColumnConstraint 5007 | exp.ComputedColumnConstraint 5008 | exp.GeneratedAsRowColumnConstraint 5009 ): 5010 if self._match_text_seq("BY", "DEFAULT"): 5011 on_null = self._match_pair(TokenType.ON, TokenType.NULL) 5012 this = self.expression( 5013 exp.GeneratedAsIdentityColumnConstraint, this=False, on_null=on_null 5014 ) 5015 else: 5016 self._match_text_seq("ALWAYS") 5017 this = self.expression(exp.GeneratedAsIdentityColumnConstraint, this=True) 5018 5019 self._match(TokenType.ALIAS) 5020 5021 if self._match_text_seq("ROW"): 5022 start = self._match_text_seq("START") 5023 if not start: 5024 self._match(TokenType.END) 5025 hidden = self._match_text_seq("HIDDEN") 5026 return self.expression(exp.GeneratedAsRowColumnConstraint, start=start, hidden=hidden) 5027 5028 identity = self._match_text_seq("IDENTITY") 5029 5030 if self._match(TokenType.L_PAREN): 5031 if self._match(TokenType.START_WITH): 5032 this.set("start", self._parse_bitwise()) 5033 if self._match_text_seq("INCREMENT", "BY"): 5034 this.set("increment", self._parse_bitwise()) 5035 if self._match_text_seq("MINVALUE"): 5036 this.set("minvalue", self._parse_bitwise()) 5037 if self._match_text_seq("MAXVALUE"): 5038 this.set("maxvalue", self._parse_bitwise()) 5039 5040 if self._match_text_seq("CYCLE"): 5041 this.set("cycle", True) 5042 elif self._match_text_seq("NO", "CYCLE"): 5043 this.set("cycle", False) 5044 5045 if not identity: 5046 this.set("expression", self._parse_range()) 5047 elif not this.args.get("start") and self._match(TokenType.NUMBER, advance=False): 5048 args = self._parse_csv(self._parse_bitwise) 5049 this.set("start", seq_get(args, 0)) 5050 this.set("increment", seq_get(args, 1)) 5051 5052 self._match_r_paren() 5053 5054 return this 5055 5056 def _parse_inline(self) -> exp.InlineLengthColumnConstraint: 5057 self._match_text_seq("LENGTH") 5058 return self.expression(exp.InlineLengthColumnConstraint, this=self._parse_bitwise()) 5059 5060 def _parse_not_constraint(self) -> t.Optional[exp.Expression]: 5061 if self._match_text_seq("NULL"): 5062 return self.expression(exp.NotNullColumnConstraint) 5063 if self._match_text_seq("CASESPECIFIC"): 5064 return self.expression(exp.CaseSpecificColumnConstraint, not_=True) 5065 if self._match_text_seq("FOR", "REPLICATION"): 5066 return self.expression(exp.NotForReplicationColumnConstraint) 5067 return None 5068 5069 def _parse_column_constraint(self) -> t.Optional[exp.Expression]: 5070 if self._match(TokenType.CONSTRAINT): 5071 this = self._parse_id_var() 5072 else: 5073 this = None 5074 5075 if self._match_texts(self.CONSTRAINT_PARSERS): 5076 return self.expression( 5077 exp.ColumnConstraint, 5078 this=this, 5079 kind=self.CONSTRAINT_PARSERS[self._prev.text.upper()](self), 5080 ) 5081 5082 return this 5083 5084 def _parse_constraint(self) -> t.Optional[exp.Expression]: 5085 if not self._match(TokenType.CONSTRAINT): 5086 return self._parse_unnamed_constraint(constraints=self.SCHEMA_UNNAMED_CONSTRAINTS) 5087 5088 return self.expression( 5089 exp.Constraint, 5090 this=self._parse_id_var(), 5091 expressions=self._parse_unnamed_constraints(), 5092 ) 5093 5094 def _parse_unnamed_constraints(self) -> t.List[exp.Expression]: 5095 constraints = [] 5096 while True: 5097 constraint = self._parse_unnamed_constraint() or self._parse_function() 5098 if not constraint: 5099 break 5100 constraints.append(constraint) 5101 5102 return constraints 5103 5104 def _parse_unnamed_constraint( 5105 self, constraints: t.Optional[t.Collection[str]] = None 5106 ) -> t.Optional[exp.Expression]: 5107 if self._match(TokenType.IDENTIFIER, advance=False) or not self._match_texts( 5108 constraints or self.CONSTRAINT_PARSERS 5109 ): 5110 return None 5111 5112 constraint = self._prev.text.upper() 5113 if constraint not in self.CONSTRAINT_PARSERS: 5114 self.raise_error(f"No parser found for schema constraint {constraint}.") 5115 5116 return self.CONSTRAINT_PARSERS[constraint](self) 5117 5118 def _parse_unique(self) -> exp.UniqueColumnConstraint: 5119 self._match_text_seq("KEY") 5120 return self.expression( 5121 exp.UniqueColumnConstraint, 5122 this=self._parse_schema(self._parse_id_var(any_token=False)), 5123 index_type=self._match(TokenType.USING) and self._advance_any() and self._prev.text, 5124 on_conflict=self._parse_on_conflict(), 5125 ) 5126 5127 def _parse_key_constraint_options(self) -> t.List[str]: 5128 options = [] 5129 while True: 5130 if not self._curr: 5131 break 5132 5133 if self._match(TokenType.ON): 5134 action = None 5135 on = self._advance_any() and self._prev.text 5136 5137 if self._match_text_seq("NO", "ACTION"): 5138 action = "NO ACTION" 5139 elif self._match_text_seq("CASCADE"): 5140 action = "CASCADE" 5141 elif self._match_text_seq("RESTRICT"): 5142 action = "RESTRICT" 5143 elif self._match_pair(TokenType.SET, TokenType.NULL): 5144 action = "SET NULL" 5145 elif self._match_pair(TokenType.SET, TokenType.DEFAULT): 5146 action = "SET DEFAULT" 5147 else: 5148 self.raise_error("Invalid key constraint") 5149 5150 options.append(f"ON {on} {action}") 5151 elif self._match_text_seq("NOT", "ENFORCED"): 5152 options.append("NOT ENFORCED") 5153 elif self._match_text_seq("DEFERRABLE"): 5154 options.append("DEFERRABLE") 5155 elif self._match_text_seq("INITIALLY", "DEFERRED"): 5156 options.append("INITIALLY DEFERRED") 5157 elif self._match_text_seq("NORELY"): 5158 options.append("NORELY") 5159 elif self._match_text_seq("MATCH", "FULL"): 5160 options.append("MATCH FULL") 5161 else: 5162 break 5163 5164 return options 5165 5166 def _parse_references(self, match: bool = True) -> t.Optional[exp.Reference]: 5167 if match and not self._match(TokenType.REFERENCES): 5168 return None 5169 5170 expressions = None 5171 this = self._parse_table(schema=True) 5172 options = self._parse_key_constraint_options() 5173 return self.expression(exp.Reference, this=this, expressions=expressions, options=options) 5174 5175 def _parse_foreign_key(self) -> exp.ForeignKey: 5176 expressions = self._parse_wrapped_id_vars() 5177 reference = self._parse_references() 5178 options = {} 5179 5180 while self._match(TokenType.ON): 5181 if not self._match_set((TokenType.DELETE, TokenType.UPDATE)): 5182 self.raise_error("Expected DELETE or UPDATE") 5183 5184 kind = self._prev.text.lower() 5185 5186 if self._match_text_seq("NO", "ACTION"): 5187 action = "NO ACTION" 5188 elif self._match(TokenType.SET): 5189 self._match_set((TokenType.NULL, TokenType.DEFAULT)) 5190 action = "SET " + self._prev.text.upper() 5191 else: 5192 self._advance() 5193 action = self._prev.text.upper() 5194 5195 options[kind] = action 5196 5197 return self.expression( 5198 exp.ForeignKey, 5199 expressions=expressions, 5200 reference=reference, 5201 **options, # type: ignore 5202 ) 5203 5204 def _parse_primary_key_part(self) -> t.Optional[exp.Expression]: 5205 return self._parse_field() 5206 5207 def _parse_period_for_system_time(self) -> t.Optional[exp.PeriodForSystemTimeConstraint]: 5208 if not self._match(TokenType.TIMESTAMP_SNAPSHOT): 5209 self._retreat(self._index - 1) 5210 return None 5211 5212 id_vars = self._parse_wrapped_id_vars() 5213 return self.expression( 5214 exp.PeriodForSystemTimeConstraint, 5215 this=seq_get(id_vars, 0), 5216 expression=seq_get(id_vars, 1), 5217 ) 5218 5219 def _parse_primary_key( 5220 self, wrapped_optional: bool = False, in_props: bool = False 5221 ) -> exp.PrimaryKeyColumnConstraint | exp.PrimaryKey: 5222 desc = ( 5223 self._match_set((TokenType.ASC, TokenType.DESC)) 5224 and self._prev.token_type == TokenType.DESC 5225 ) 5226 5227 if not in_props and not self._match(TokenType.L_PAREN, advance=False): 5228 return self.expression(exp.PrimaryKeyColumnConstraint, desc=desc) 5229 5230 expressions = self._parse_wrapped_csv( 5231 self._parse_primary_key_part, optional=wrapped_optional 5232 ) 5233 options = self._parse_key_constraint_options() 5234 return self.expression(exp.PrimaryKey, expressions=expressions, options=options) 5235 5236 def _parse_bracket_key_value(self, is_map: bool = False) -> t.Optional[exp.Expression]: 5237 return self._parse_slice(self._parse_alias(self._parse_assignment(), explicit=True)) 5238 5239 def _parse_bracket(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 5240 if not self._match_set((TokenType.L_BRACKET, TokenType.L_BRACE)): 5241 return this 5242 5243 bracket_kind = self._prev.token_type 5244 expressions = self._parse_csv( 5245 lambda: self._parse_bracket_key_value(is_map=bracket_kind == TokenType.L_BRACE) 5246 ) 5247 5248 if bracket_kind == TokenType.L_BRACKET and not self._match(TokenType.R_BRACKET): 5249 self.raise_error("Expected ]") 5250 elif bracket_kind == TokenType.L_BRACE and not self._match(TokenType.R_BRACE): 5251 self.raise_error("Expected }") 5252 5253 # https://duckdb.org/docs/sql/data_types/struct.html#creating-structs 5254 if bracket_kind == TokenType.L_BRACE: 5255 this = self.expression(exp.Struct, expressions=self._kv_to_prop_eq(expressions)) 5256 elif not this: 5257 this = self.expression(exp.Array, expressions=expressions) 5258 else: 5259 constructor_type = self.ARRAY_CONSTRUCTORS.get(this.name.upper()) 5260 if constructor_type: 5261 return self.expression(constructor_type, expressions=expressions) 5262 5263 expressions = apply_index_offset(this, expressions, -self.dialect.INDEX_OFFSET) 5264 this = self.expression(exp.Bracket, this=this, expressions=expressions) 5265 5266 self._add_comments(this) 5267 return self._parse_bracket(this) 5268 5269 def _parse_slice(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5270 if self._match(TokenType.COLON): 5271 return self.expression(exp.Slice, this=this, expression=self._parse_assignment()) 5272 return this 5273 5274 def _parse_case(self) -> t.Optional[exp.Expression]: 5275 ifs = [] 5276 default = None 5277 5278 comments = self._prev_comments 5279 expression = self._parse_assignment() 5280 5281 while self._match(TokenType.WHEN): 5282 this = self._parse_assignment() 5283 self._match(TokenType.THEN) 5284 then = self._parse_assignment() 5285 ifs.append(self.expression(exp.If, this=this, true=then)) 5286 5287 if self._match(TokenType.ELSE): 5288 default = self._parse_assignment() 5289 5290 if not self._match(TokenType.END): 5291 if isinstance(default, exp.Interval) and default.this.sql().upper() == "END": 5292 default = exp.column("interval") 5293 else: 5294 self.raise_error("Expected END after CASE", self._prev) 5295 5296 return self.expression( 5297 exp.Case, comments=comments, this=expression, ifs=ifs, default=default 5298 ) 5299 5300 def _parse_if(self) -> t.Optional[exp.Expression]: 5301 if self._match(TokenType.L_PAREN): 5302 args = self._parse_csv(self._parse_assignment) 5303 this = self.validate_expression(exp.If.from_arg_list(args), args) 5304 self._match_r_paren() 5305 else: 5306 index = self._index - 1 5307 5308 if self.NO_PAREN_IF_COMMANDS and index == 0: 5309 return self._parse_as_command(self._prev) 5310 5311 condition = self._parse_assignment() 5312 5313 if not condition: 5314 self._retreat(index) 5315 return None 5316 5317 self._match(TokenType.THEN) 5318 true = self._parse_assignment() 5319 false = self._parse_assignment() if self._match(TokenType.ELSE) else None 5320 self._match(TokenType.END) 5321 this = self.expression(exp.If, this=condition, true=true, false=false) 5322 5323 return this 5324 5325 def _parse_next_value_for(self) -> t.Optional[exp.Expression]: 5326 if not self._match_text_seq("VALUE", "FOR"): 5327 self._retreat(self._index - 1) 5328 return None 5329 5330 return self.expression( 5331 exp.NextValueFor, 5332 this=self._parse_column(), 5333 order=self._match(TokenType.OVER) and self._parse_wrapped(self._parse_order), 5334 ) 5335 5336 def _parse_extract(self) -> exp.Extract: 5337 this = self._parse_function() or self._parse_var() or self._parse_type() 5338 5339 if self._match(TokenType.FROM): 5340 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 5341 5342 if not self._match(TokenType.COMMA): 5343 self.raise_error("Expected FROM or comma after EXTRACT", self._prev) 5344 5345 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 5346 5347 def _parse_gap_fill(self) -> exp.GapFill: 5348 self._match(TokenType.TABLE) 5349 this = self._parse_table() 5350 5351 self._match(TokenType.COMMA) 5352 args = [this, *self._parse_csv(self._parse_lambda)] 5353 5354 gap_fill = exp.GapFill.from_arg_list(args) 5355 return self.validate_expression(gap_fill, args) 5356 5357 def _parse_cast(self, strict: bool, safe: t.Optional[bool] = None) -> exp.Expression: 5358 this = self._parse_assignment() 5359 5360 if not self._match(TokenType.ALIAS): 5361 if self._match(TokenType.COMMA): 5362 return self.expression(exp.CastToStrType, this=this, to=self._parse_string()) 5363 5364 self.raise_error("Expected AS after CAST") 5365 5366 fmt = None 5367 to = self._parse_types() 5368 5369 if self._match(TokenType.FORMAT): 5370 fmt_string = self._parse_string() 5371 fmt = self._parse_at_time_zone(fmt_string) 5372 5373 if not to: 5374 to = exp.DataType.build(exp.DataType.Type.UNKNOWN) 5375 if not safe and to.this in exp.DataType.TEMPORAL_TYPES: 5376 this = self.expression( 5377 exp.StrToDate if to.this == exp.DataType.Type.DATE else exp.StrToTime, 5378 this=this, 5379 format=exp.Literal.string( 5380 format_time( 5381 fmt_string.this if fmt_string else "", 5382 self.dialect.FORMAT_MAPPING or self.dialect.TIME_MAPPING, 5383 self.dialect.FORMAT_TRIE or self.dialect.TIME_TRIE, 5384 ) 5385 ), 5386 ) 5387 5388 if isinstance(fmt, exp.AtTimeZone) and isinstance(this, exp.StrToTime): 5389 this.set("zone", fmt.args["zone"]) 5390 return this 5391 elif not to: 5392 self.raise_error("Expected TYPE after CAST") 5393 elif isinstance(to, exp.Identifier): 5394 to = exp.DataType.build(to.name, udt=True) 5395 elif to.this == exp.DataType.Type.CHAR: 5396 if self._match(TokenType.CHARACTER_SET): 5397 to = self.expression(exp.CharacterSet, this=self._parse_var_or_string()) 5398 5399 return self.expression( 5400 exp.Cast if strict else exp.TryCast, 5401 this=this, 5402 to=to, 5403 format=fmt, 5404 safe=safe, 5405 action=self._parse_var_from_options(self.CAST_ACTIONS, raise_unmatched=False), 5406 ) 5407 5408 def _parse_string_agg(self) -> exp.Expression: 5409 if self._match(TokenType.DISTINCT): 5410 args: t.List[t.Optional[exp.Expression]] = [ 5411 self.expression(exp.Distinct, expressions=[self._parse_assignment()]) 5412 ] 5413 if self._match(TokenType.COMMA): 5414 args.extend(self._parse_csv(self._parse_assignment)) 5415 else: 5416 args = self._parse_csv(self._parse_assignment) # type: ignore 5417 5418 index = self._index 5419 if not self._match(TokenType.R_PAREN) and args: 5420 # postgres: STRING_AGG([DISTINCT] expression, separator [ORDER BY expression1 {ASC | DESC} [, ...]]) 5421 # bigquery: STRING_AGG([DISTINCT] expression [, separator] [ORDER BY key [{ASC | DESC}] [, ... ]] [LIMIT n]) 5422 args[-1] = self._parse_limit(this=self._parse_order(this=args[-1])) 5423 return self.expression(exp.GroupConcat, this=args[0], separator=seq_get(args, 1)) 5424 5425 # Checks if we can parse an order clause: WITHIN GROUP (ORDER BY <order_by_expression_list> [ASC | DESC]). 5426 # This is done "manually", instead of letting _parse_window parse it into an exp.WithinGroup node, so that 5427 # the STRING_AGG call is parsed like in MySQL / SQLite and can thus be transpiled more easily to them. 5428 if not self._match_text_seq("WITHIN", "GROUP"): 5429 self._retreat(index) 5430 return self.validate_expression(exp.GroupConcat.from_arg_list(args), args) 5431 5432 self._match_l_paren() # The corresponding match_r_paren will be called in parse_function (caller) 5433 order = self._parse_order(this=seq_get(args, 0)) 5434 return self.expression(exp.GroupConcat, this=order, separator=seq_get(args, 1)) 5435 5436 def _parse_convert( 5437 self, strict: bool, safe: t.Optional[bool] = None 5438 ) -> t.Optional[exp.Expression]: 5439 this = self._parse_bitwise() 5440 5441 if self._match(TokenType.USING): 5442 to: t.Optional[exp.Expression] = self.expression( 5443 exp.CharacterSet, this=self._parse_var() 5444 ) 5445 elif self._match(TokenType.COMMA): 5446 to = self._parse_types() 5447 else: 5448 to = None 5449 5450 return self.expression(exp.Cast if strict else exp.TryCast, this=this, to=to, safe=safe) 5451 5452 def _parse_decode(self) -> t.Optional[exp.Decode | exp.Case]: 5453 """ 5454 There are generally two variants of the DECODE function: 5455 5456 - DECODE(bin, charset) 5457 - DECODE(expression, search, result [, search, result] ... [, default]) 5458 5459 The second variant will always be parsed into a CASE expression. Note that NULL 5460 needs special treatment, since we need to explicitly check for it with `IS NULL`, 5461 instead of relying on pattern matching. 5462 """ 5463 args = self._parse_csv(self._parse_assignment) 5464 5465 if len(args) < 3: 5466 return self.expression(exp.Decode, this=seq_get(args, 0), charset=seq_get(args, 1)) 5467 5468 expression, *expressions = args 5469 if not expression: 5470 return None 5471 5472 ifs = [] 5473 for search, result in zip(expressions[::2], expressions[1::2]): 5474 if not search or not result: 5475 return None 5476 5477 if isinstance(search, exp.Literal): 5478 ifs.append( 5479 exp.If(this=exp.EQ(this=expression.copy(), expression=search), true=result) 5480 ) 5481 elif isinstance(search, exp.Null): 5482 ifs.append( 5483 exp.If(this=exp.Is(this=expression.copy(), expression=exp.Null()), true=result) 5484 ) 5485 else: 5486 cond = exp.or_( 5487 exp.EQ(this=expression.copy(), expression=search), 5488 exp.and_( 5489 exp.Is(this=expression.copy(), expression=exp.Null()), 5490 exp.Is(this=search.copy(), expression=exp.Null()), 5491 copy=False, 5492 ), 5493 copy=False, 5494 ) 5495 ifs.append(exp.If(this=cond, true=result)) 5496 5497 return exp.Case(ifs=ifs, default=expressions[-1] if len(expressions) % 2 == 1 else None) 5498 5499 def _parse_json_key_value(self) -> t.Optional[exp.JSONKeyValue]: 5500 self._match_text_seq("KEY") 5501 key = self._parse_column() 5502 self._match_set(self.JSON_KEY_VALUE_SEPARATOR_TOKENS) 5503 self._match_text_seq("VALUE") 5504 value = self._parse_bitwise() 5505 5506 if not key and not value: 5507 return None 5508 return self.expression(exp.JSONKeyValue, this=key, expression=value) 5509 5510 def _parse_format_json(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5511 if not this or not self._match_text_seq("FORMAT", "JSON"): 5512 return this 5513 5514 return self.expression(exp.FormatJson, this=this) 5515 5516 def _parse_on_handling(self, on: str, *values: str) -> t.Optional[str]: 5517 # Parses the "X ON Y" syntax, i.e. NULL ON NULL (Oracle, T-SQL) 5518 for value in values: 5519 if self._match_text_seq(value, "ON", on): 5520 return f"{value} ON {on}" 5521 5522 return None 5523 5524 @t.overload 5525 def _parse_json_object(self, agg: Lit[False]) -> exp.JSONObject: ... 5526 5527 @t.overload 5528 def _parse_json_object(self, agg: Lit[True]) -> exp.JSONObjectAgg: ... 5529 5530 def _parse_json_object(self, agg=False): 5531 star = self._parse_star() 5532 expressions = ( 5533 [star] 5534 if star 5535 else self._parse_csv(lambda: self._parse_format_json(self._parse_json_key_value())) 5536 ) 5537 null_handling = self._parse_on_handling("NULL", "NULL", "ABSENT") 5538 5539 unique_keys = None 5540 if self._match_text_seq("WITH", "UNIQUE"): 5541 unique_keys = True 5542 elif self._match_text_seq("WITHOUT", "UNIQUE"): 5543 unique_keys = False 5544 5545 self._match_text_seq("KEYS") 5546 5547 return_type = self._match_text_seq("RETURNING") and self._parse_format_json( 5548 self._parse_type() 5549 ) 5550 encoding = self._match_text_seq("ENCODING") and self._parse_var() 5551 5552 return self.expression( 5553 exp.JSONObjectAgg if agg else exp.JSONObject, 5554 expressions=expressions, 5555 null_handling=null_handling, 5556 unique_keys=unique_keys, 5557 return_type=return_type, 5558 encoding=encoding, 5559 ) 5560 5561 # Note: this is currently incomplete; it only implements the "JSON_value_column" part 5562 def _parse_json_column_def(self) -> exp.JSONColumnDef: 5563 if not self._match_text_seq("NESTED"): 5564 this = self._parse_id_var() 5565 kind = self._parse_types(allow_identifiers=False) 5566 nested = None 5567 else: 5568 this = None 5569 kind = None 5570 nested = True 5571 5572 path = self._match_text_seq("PATH") and self._parse_string() 5573 nested_schema = nested and self._parse_json_schema() 5574 5575 return self.expression( 5576 exp.JSONColumnDef, 5577 this=this, 5578 kind=kind, 5579 path=path, 5580 nested_schema=nested_schema, 5581 ) 5582 5583 def _parse_json_schema(self) -> exp.JSONSchema: 5584 self._match_text_seq("COLUMNS") 5585 return self.expression( 5586 exp.JSONSchema, 5587 expressions=self._parse_wrapped_csv(self._parse_json_column_def, optional=True), 5588 ) 5589 5590 def _parse_json_table(self) -> exp.JSONTable: 5591 this = self._parse_format_json(self._parse_bitwise()) 5592 path = self._match(TokenType.COMMA) and self._parse_string() 5593 error_handling = self._parse_on_handling("ERROR", "ERROR", "NULL") 5594 empty_handling = self._parse_on_handling("EMPTY", "ERROR", "NULL") 5595 schema = self._parse_json_schema() 5596 5597 return exp.JSONTable( 5598 this=this, 5599 schema=schema, 5600 path=path, 5601 error_handling=error_handling, 5602 empty_handling=empty_handling, 5603 ) 5604 5605 def _parse_match_against(self) -> exp.MatchAgainst: 5606 expressions = self._parse_csv(self._parse_column) 5607 5608 self._match_text_seq(")", "AGAINST", "(") 5609 5610 this = self._parse_string() 5611 5612 if self._match_text_seq("IN", "NATURAL", "LANGUAGE", "MODE"): 5613 modifier = "IN NATURAL LANGUAGE MODE" 5614 if self._match_text_seq("WITH", "QUERY", "EXPANSION"): 5615 modifier = f"{modifier} WITH QUERY EXPANSION" 5616 elif self._match_text_seq("IN", "BOOLEAN", "MODE"): 5617 modifier = "IN BOOLEAN MODE" 5618 elif self._match_text_seq("WITH", "QUERY", "EXPANSION"): 5619 modifier = "WITH QUERY EXPANSION" 5620 else: 5621 modifier = None 5622 5623 return self.expression( 5624 exp.MatchAgainst, this=this, expressions=expressions, modifier=modifier 5625 ) 5626 5627 # https://learn.microsoft.com/en-us/sql/t-sql/functions/openjson-transact-sql?view=sql-server-ver16 5628 def _parse_open_json(self) -> exp.OpenJSON: 5629 this = self._parse_bitwise() 5630 path = self._match(TokenType.COMMA) and self._parse_string() 5631 5632 def _parse_open_json_column_def() -> exp.OpenJSONColumnDef: 5633 this = self._parse_field(any_token=True) 5634 kind = self._parse_types() 5635 path = self._parse_string() 5636 as_json = self._match_pair(TokenType.ALIAS, TokenType.JSON) 5637 5638 return self.expression( 5639 exp.OpenJSONColumnDef, this=this, kind=kind, path=path, as_json=as_json 5640 ) 5641 5642 expressions = None 5643 if self._match_pair(TokenType.R_PAREN, TokenType.WITH): 5644 self._match_l_paren() 5645 expressions = self._parse_csv(_parse_open_json_column_def) 5646 5647 return self.expression(exp.OpenJSON, this=this, path=path, expressions=expressions) 5648 5649 def _parse_position(self, haystack_first: bool = False) -> exp.StrPosition: 5650 args = self._parse_csv(self._parse_bitwise) 5651 5652 if self._match(TokenType.IN): 5653 return self.expression( 5654 exp.StrPosition, this=self._parse_bitwise(), substr=seq_get(args, 0) 5655 ) 5656 5657 if haystack_first: 5658 haystack = seq_get(args, 0) 5659 needle = seq_get(args, 1) 5660 else: 5661 needle = seq_get(args, 0) 5662 haystack = seq_get(args, 1) 5663 5664 return self.expression( 5665 exp.StrPosition, this=haystack, substr=needle, position=seq_get(args, 2) 5666 ) 5667 5668 def _parse_predict(self) -> exp.Predict: 5669 self._match_text_seq("MODEL") 5670 this = self._parse_table() 5671 5672 self._match(TokenType.COMMA) 5673 self._match_text_seq("TABLE") 5674 5675 return self.expression( 5676 exp.Predict, 5677 this=this, 5678 expression=self._parse_table(), 5679 params_struct=self._match(TokenType.COMMA) and self._parse_bitwise(), 5680 ) 5681 5682 def _parse_join_hint(self, func_name: str) -> exp.JoinHint: 5683 args = self._parse_csv(self._parse_table) 5684 return exp.JoinHint(this=func_name.upper(), expressions=args) 5685 5686 def _parse_substring(self) -> exp.Substring: 5687 # Postgres supports the form: substring(string [from int] [for int]) 5688 # https://www.postgresql.org/docs/9.1/functions-string.html @ Table 9-6 5689 5690 args = t.cast(t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_bitwise)) 5691 5692 if self._match(TokenType.FROM): 5693 args.append(self._parse_bitwise()) 5694 if self._match(TokenType.FOR): 5695 if len(args) == 1: 5696 args.append(exp.Literal.number(1)) 5697 args.append(self._parse_bitwise()) 5698 5699 return self.validate_expression(exp.Substring.from_arg_list(args), args) 5700 5701 def _parse_trim(self) -> exp.Trim: 5702 # https://www.w3resource.com/sql/character-functions/trim.php 5703 # https://docs.oracle.com/javadb/10.8.3.0/ref/rreftrimfunc.html 5704 5705 position = None 5706 collation = None 5707 expression = None 5708 5709 if self._match_texts(self.TRIM_TYPES): 5710 position = self._prev.text.upper() 5711 5712 this = self._parse_bitwise() 5713 if self._match_set((TokenType.FROM, TokenType.COMMA)): 5714 invert_order = self._prev.token_type == TokenType.FROM or self.TRIM_PATTERN_FIRST 5715 expression = self._parse_bitwise() 5716 5717 if invert_order: 5718 this, expression = expression, this 5719 5720 if self._match(TokenType.COLLATE): 5721 collation = self._parse_bitwise() 5722 5723 return self.expression( 5724 exp.Trim, this=this, position=position, expression=expression, collation=collation 5725 ) 5726 5727 def _parse_window_clause(self) -> t.Optional[t.List[exp.Expression]]: 5728 return self._match(TokenType.WINDOW) and self._parse_csv(self._parse_named_window) 5729 5730 def _parse_named_window(self) -> t.Optional[exp.Expression]: 5731 return self._parse_window(self._parse_id_var(), alias=True) 5732 5733 def _parse_respect_or_ignore_nulls( 5734 self, this: t.Optional[exp.Expression] 5735 ) -> t.Optional[exp.Expression]: 5736 if self._match_text_seq("IGNORE", "NULLS"): 5737 return self.expression(exp.IgnoreNulls, this=this) 5738 if self._match_text_seq("RESPECT", "NULLS"): 5739 return self.expression(exp.RespectNulls, this=this) 5740 return this 5741 5742 def _parse_having_max(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5743 if self._match(TokenType.HAVING): 5744 self._match_texts(("MAX", "MIN")) 5745 max = self._prev.text.upper() != "MIN" 5746 return self.expression( 5747 exp.HavingMax, this=this, expression=self._parse_column(), max=max 5748 ) 5749 5750 return this 5751 5752 def _parse_window( 5753 self, this: t.Optional[exp.Expression], alias: bool = False 5754 ) -> t.Optional[exp.Expression]: 5755 func = this 5756 comments = func.comments if isinstance(func, exp.Expression) else None 5757 5758 if self._match_pair(TokenType.FILTER, TokenType.L_PAREN): 5759 self._match(TokenType.WHERE) 5760 this = self.expression( 5761 exp.Filter, this=this, expression=self._parse_where(skip_where_token=True) 5762 ) 5763 self._match_r_paren() 5764 5765 # T-SQL allows the OVER (...) syntax after WITHIN GROUP. 5766 # https://learn.microsoft.com/en-us/sql/t-sql/functions/percentile-disc-transact-sql?view=sql-server-ver16 5767 if self._match_text_seq("WITHIN", "GROUP"): 5768 order = self._parse_wrapped(self._parse_order) 5769 this = self.expression(exp.WithinGroup, this=this, expression=order) 5770 5771 # SQL spec defines an optional [ { IGNORE | RESPECT } NULLS ] OVER 5772 # Some dialects choose to implement and some do not. 5773 # https://dev.mysql.com/doc/refman/8.0/en/window-function-descriptions.html 5774 5775 # There is some code above in _parse_lambda that handles 5776 # SELECT FIRST_VALUE(TABLE.COLUMN IGNORE|RESPECT NULLS) OVER ... 5777 5778 # The below changes handle 5779 # SELECT FIRST_VALUE(TABLE.COLUMN) IGNORE|RESPECT NULLS OVER ... 5780 5781 # Oracle allows both formats 5782 # (https://docs.oracle.com/en/database/oracle/oracle-database/19/sqlrf/img_text/first_value.html) 5783 # and Snowflake chose to do the same for familiarity 5784 # https://docs.snowflake.com/en/sql-reference/functions/first_value.html#usage-notes 5785 if isinstance(this, exp.AggFunc): 5786 ignore_respect = this.find(exp.IgnoreNulls, exp.RespectNulls) 5787 5788 if ignore_respect and ignore_respect is not this: 5789 ignore_respect.replace(ignore_respect.this) 5790 this = self.expression(ignore_respect.__class__, this=this) 5791 5792 this = self._parse_respect_or_ignore_nulls(this) 5793 5794 # bigquery select from window x AS (partition by ...) 5795 if alias: 5796 over = None 5797 self._match(TokenType.ALIAS) 5798 elif not self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS): 5799 return this 5800 else: 5801 over = self._prev.text.upper() 5802 5803 if comments and isinstance(func, exp.Expression): 5804 func.pop_comments() 5805 5806 if not self._match(TokenType.L_PAREN): 5807 return self.expression( 5808 exp.Window, 5809 comments=comments, 5810 this=this, 5811 alias=self._parse_id_var(False), 5812 over=over, 5813 ) 5814 5815 window_alias = self._parse_id_var(any_token=False, tokens=self.WINDOW_ALIAS_TOKENS) 5816 5817 first = self._match(TokenType.FIRST) 5818 if self._match_text_seq("LAST"): 5819 first = False 5820 5821 partition, order = self._parse_partition_and_order() 5822 kind = self._match_set((TokenType.ROWS, TokenType.RANGE)) and self._prev.text 5823 5824 if kind: 5825 self._match(TokenType.BETWEEN) 5826 start = self._parse_window_spec() 5827 self._match(TokenType.AND) 5828 end = self._parse_window_spec() 5829 5830 spec = self.expression( 5831 exp.WindowSpec, 5832 kind=kind, 5833 start=start["value"], 5834 start_side=start["side"], 5835 end=end["value"], 5836 end_side=end["side"], 5837 ) 5838 else: 5839 spec = None 5840 5841 self._match_r_paren() 5842 5843 window = self.expression( 5844 exp.Window, 5845 comments=comments, 5846 this=this, 5847 partition_by=partition, 5848 order=order, 5849 spec=spec, 5850 alias=window_alias, 5851 over=over, 5852 first=first, 5853 ) 5854 5855 # This covers Oracle's FIRST/LAST syntax: aggregate KEEP (...) OVER (...) 5856 if self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS, advance=False): 5857 return self._parse_window(window, alias=alias) 5858 5859 return window 5860 5861 def _parse_partition_and_order( 5862 self, 5863 ) -> t.Tuple[t.List[exp.Expression], t.Optional[exp.Expression]]: 5864 return self._parse_partition_by(), self._parse_order() 5865 5866 def _parse_window_spec(self) -> t.Dict[str, t.Optional[str | exp.Expression]]: 5867 self._match(TokenType.BETWEEN) 5868 5869 return { 5870 "value": ( 5871 (self._match_text_seq("UNBOUNDED") and "UNBOUNDED") 5872 or (self._match_text_seq("CURRENT", "ROW") and "CURRENT ROW") 5873 or self._parse_bitwise() 5874 ), 5875 "side": self._match_texts(self.WINDOW_SIDES) and self._prev.text, 5876 } 5877 5878 def _parse_alias( 5879 self, this: t.Optional[exp.Expression], explicit: bool = False 5880 ) -> t.Optional[exp.Expression]: 5881 any_token = self._match(TokenType.ALIAS) 5882 comments = self._prev_comments or [] 5883 5884 if explicit and not any_token: 5885 return this 5886 5887 if self._match(TokenType.L_PAREN): 5888 aliases = self.expression( 5889 exp.Aliases, 5890 comments=comments, 5891 this=this, 5892 expressions=self._parse_csv(lambda: self._parse_id_var(any_token)), 5893 ) 5894 self._match_r_paren(aliases) 5895 return aliases 5896 5897 alias = self._parse_id_var(any_token, tokens=self.ALIAS_TOKENS) or ( 5898 self.STRING_ALIASES and self._parse_string_as_identifier() 5899 ) 5900 5901 if alias: 5902 comments.extend(alias.pop_comments()) 5903 this = self.expression(exp.Alias, comments=comments, this=this, alias=alias) 5904 column = this.this 5905 5906 # Moves the comment next to the alias in `expr /* comment */ AS alias` 5907 if not this.comments and column and column.comments: 5908 this.comments = column.pop_comments() 5909 5910 return this 5911 5912 def _parse_id_var( 5913 self, 5914 any_token: bool = True, 5915 tokens: t.Optional[t.Collection[TokenType]] = None, 5916 ) -> t.Optional[exp.Expression]: 5917 expression = self._parse_identifier() 5918 if not expression and ( 5919 (any_token and self._advance_any()) or self._match_set(tokens or self.ID_VAR_TOKENS) 5920 ): 5921 quoted = self._prev.token_type == TokenType.STRING 5922 expression = self.expression(exp.Identifier, this=self._prev.text, quoted=quoted) 5923 5924 return expression 5925 5926 def _parse_string(self) -> t.Optional[exp.Expression]: 5927 if self._match_set(self.STRING_PARSERS): 5928 return self.STRING_PARSERS[self._prev.token_type](self, self._prev) 5929 return self._parse_placeholder() 5930 5931 def _parse_string_as_identifier(self) -> t.Optional[exp.Identifier]: 5932 return exp.to_identifier(self._match(TokenType.STRING) and self._prev.text, quoted=True) 5933 5934 def _parse_number(self) -> t.Optional[exp.Expression]: 5935 if self._match_set(self.NUMERIC_PARSERS): 5936 return self.NUMERIC_PARSERS[self._prev.token_type](self, self._prev) 5937 return self._parse_placeholder() 5938 5939 def _parse_identifier(self) -> t.Optional[exp.Expression]: 5940 if self._match(TokenType.IDENTIFIER): 5941 return self.expression(exp.Identifier, this=self._prev.text, quoted=True) 5942 return self._parse_placeholder() 5943 5944 def _parse_var( 5945 self, 5946 any_token: bool = False, 5947 tokens: t.Optional[t.Collection[TokenType]] = None, 5948 upper: bool = False, 5949 ) -> t.Optional[exp.Expression]: 5950 if ( 5951 (any_token and self._advance_any()) 5952 or self._match(TokenType.VAR) 5953 or (self._match_set(tokens) if tokens else False) 5954 ): 5955 return self.expression( 5956 exp.Var, this=self._prev.text.upper() if upper else self._prev.text 5957 ) 5958 return self._parse_placeholder() 5959 5960 def _advance_any(self, ignore_reserved: bool = False) -> t.Optional[Token]: 5961 if self._curr and (ignore_reserved or self._curr.token_type not in self.RESERVED_TOKENS): 5962 self._advance() 5963 return self._prev 5964 return None 5965 5966 def _parse_var_or_string(self) -> t.Optional[exp.Expression]: 5967 return self._parse_string() or self._parse_var(any_token=True) 5968 5969 def _parse_primary_or_var(self) -> t.Optional[exp.Expression]: 5970 return self._parse_primary() or self._parse_var(any_token=True) 5971 5972 def _parse_null(self) -> t.Optional[exp.Expression]: 5973 if self._match_set(self.NULL_TOKENS): 5974 return self.PRIMARY_PARSERS[TokenType.NULL](self, self._prev) 5975 return self._parse_placeholder() 5976 5977 def _parse_boolean(self) -> t.Optional[exp.Expression]: 5978 if self._match(TokenType.TRUE): 5979 return self.PRIMARY_PARSERS[TokenType.TRUE](self, self._prev) 5980 if self._match(TokenType.FALSE): 5981 return self.PRIMARY_PARSERS[TokenType.FALSE](self, self._prev) 5982 return self._parse_placeholder() 5983 5984 def _parse_star(self) -> t.Optional[exp.Expression]: 5985 if self._match(TokenType.STAR): 5986 return self.PRIMARY_PARSERS[TokenType.STAR](self, self._prev) 5987 return self._parse_placeholder() 5988 5989 def _parse_parameter(self) -> exp.Parameter: 5990 this = self._parse_identifier() or self._parse_primary_or_var() 5991 return self.expression(exp.Parameter, this=this) 5992 5993 def _parse_placeholder(self) -> t.Optional[exp.Expression]: 5994 if self._match_set(self.PLACEHOLDER_PARSERS): 5995 placeholder = self.PLACEHOLDER_PARSERS[self._prev.token_type](self) 5996 if placeholder: 5997 return placeholder 5998 self._advance(-1) 5999 return None 6000 6001 def _parse_star_op(self, *keywords: str) -> t.Optional[t.List[exp.Expression]]: 6002 if not self._match_texts(keywords): 6003 return None 6004 if self._match(TokenType.L_PAREN, advance=False): 6005 return self._parse_wrapped_csv(self._parse_expression) 6006 6007 expression = self._parse_expression() 6008 return [expression] if expression else None 6009 6010 def _parse_csv( 6011 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA 6012 ) -> t.List[exp.Expression]: 6013 parse_result = parse_method() 6014 items = [parse_result] if parse_result is not None else [] 6015 6016 while self._match(sep): 6017 self._add_comments(parse_result) 6018 parse_result = parse_method() 6019 if parse_result is not None: 6020 items.append(parse_result) 6021 6022 return items 6023 6024 def _parse_tokens( 6025 self, parse_method: t.Callable, expressions: t.Dict 6026 ) -> t.Optional[exp.Expression]: 6027 this = parse_method() 6028 6029 while self._match_set(expressions): 6030 this = self.expression( 6031 expressions[self._prev.token_type], 6032 this=this, 6033 comments=self._prev_comments, 6034 expression=parse_method(), 6035 ) 6036 6037 return this 6038 6039 def _parse_wrapped_id_vars(self, optional: bool = False) -> t.List[exp.Expression]: 6040 return self._parse_wrapped_csv(self._parse_id_var, optional=optional) 6041 6042 def _parse_wrapped_csv( 6043 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA, optional: bool = False 6044 ) -> t.List[exp.Expression]: 6045 return self._parse_wrapped( 6046 lambda: self._parse_csv(parse_method, sep=sep), optional=optional 6047 ) 6048 6049 def _parse_wrapped(self, parse_method: t.Callable, optional: bool = False) -> t.Any: 6050 wrapped = self._match(TokenType.L_PAREN) 6051 if not wrapped and not optional: 6052 self.raise_error("Expecting (") 6053 parse_result = parse_method() 6054 if wrapped: 6055 self._match_r_paren() 6056 return parse_result 6057 6058 def _parse_expressions(self) -> t.List[exp.Expression]: 6059 return self._parse_csv(self._parse_expression) 6060 6061 def _parse_select_or_expression(self, alias: bool = False) -> t.Optional[exp.Expression]: 6062 return self._parse_select() or self._parse_set_operations( 6063 self._parse_expression() if alias else self._parse_assignment() 6064 ) 6065 6066 def _parse_ddl_select(self) -> t.Optional[exp.Expression]: 6067 return self._parse_query_modifiers( 6068 self._parse_set_operations(self._parse_select(nested=True, parse_subquery_alias=False)) 6069 ) 6070 6071 def _parse_transaction(self) -> exp.Transaction | exp.Command: 6072 this = None 6073 if self._match_texts(self.TRANSACTION_KIND): 6074 this = self._prev.text 6075 6076 self._match_texts(("TRANSACTION", "WORK")) 6077 6078 modes = [] 6079 while True: 6080 mode = [] 6081 while self._match(TokenType.VAR): 6082 mode.append(self._prev.text) 6083 6084 if mode: 6085 modes.append(" ".join(mode)) 6086 if not self._match(TokenType.COMMA): 6087 break 6088 6089 return self.expression(exp.Transaction, this=this, modes=modes) 6090 6091 def _parse_commit_or_rollback(self) -> exp.Commit | exp.Rollback: 6092 chain = None 6093 savepoint = None 6094 is_rollback = self._prev.token_type == TokenType.ROLLBACK 6095 6096 self._match_texts(("TRANSACTION", "WORK")) 6097 6098 if self._match_text_seq("TO"): 6099 self._match_text_seq("SAVEPOINT") 6100 savepoint = self._parse_id_var() 6101 6102 if self._match(TokenType.AND): 6103 chain = not self._match_text_seq("NO") 6104 self._match_text_seq("CHAIN") 6105 6106 if is_rollback: 6107 return self.expression(exp.Rollback, savepoint=savepoint) 6108 6109 return self.expression(exp.Commit, chain=chain) 6110 6111 def _parse_refresh(self) -> exp.Refresh: 6112 self._match(TokenType.TABLE) 6113 return self.expression(exp.Refresh, this=self._parse_string() or self._parse_table()) 6114 6115 def _parse_add_column(self) -> t.Optional[exp.Expression]: 6116 if not self._match_text_seq("ADD"): 6117 return None 6118 6119 self._match(TokenType.COLUMN) 6120 exists_column = self._parse_exists(not_=True) 6121 expression = self._parse_field_def() 6122 6123 if expression: 6124 expression.set("exists", exists_column) 6125 6126 # https://docs.databricks.com/delta/update-schema.html#explicitly-update-schema-to-add-columns 6127 if self._match_texts(("FIRST", "AFTER")): 6128 position = self._prev.text 6129 column_position = self.expression( 6130 exp.ColumnPosition, this=self._parse_column(), position=position 6131 ) 6132 expression.set("position", column_position) 6133 6134 return expression 6135 6136 def _parse_drop_column(self) -> t.Optional[exp.Drop | exp.Command]: 6137 drop = self._match(TokenType.DROP) and self._parse_drop() 6138 if drop and not isinstance(drop, exp.Command): 6139 drop.set("kind", drop.args.get("kind", "COLUMN")) 6140 return drop 6141 6142 # https://docs.aws.amazon.com/athena/latest/ug/alter-table-drop-partition.html 6143 def _parse_drop_partition(self, exists: t.Optional[bool] = None) -> exp.DropPartition: 6144 return self.expression( 6145 exp.DropPartition, expressions=self._parse_csv(self._parse_partition), exists=exists 6146 ) 6147 6148 def _parse_alter_table_add(self) -> t.List[exp.Expression]: 6149 index = self._index - 1 6150 6151 if self._match_set(self.ADD_CONSTRAINT_TOKENS, advance=False): 6152 return self._parse_csv( 6153 lambda: self.expression( 6154 exp.AddConstraint, expressions=self._parse_csv(self._parse_constraint) 6155 ) 6156 ) 6157 6158 self._retreat(index) 6159 if not self.ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN and self._match_text_seq("ADD"): 6160 return self._parse_wrapped_csv(self._parse_field_def, optional=True) 6161 return self._parse_wrapped_csv(self._parse_add_column, optional=True) 6162 6163 def _parse_alter_table_alter(self) -> t.Optional[exp.Expression]: 6164 if self._match_texts(self.ALTER_ALTER_PARSERS): 6165 return self.ALTER_ALTER_PARSERS[self._prev.text.upper()](self) 6166 6167 # Many dialects support the ALTER [COLUMN] syntax, so if there is no 6168 # keyword after ALTER we default to parsing this statement 6169 self._match(TokenType.COLUMN) 6170 column = self._parse_field(any_token=True) 6171 6172 if self._match_pair(TokenType.DROP, TokenType.DEFAULT): 6173 return self.expression(exp.AlterColumn, this=column, drop=True) 6174 if self._match_pair(TokenType.SET, TokenType.DEFAULT): 6175 return self.expression(exp.AlterColumn, this=column, default=self._parse_assignment()) 6176 if self._match(TokenType.COMMENT): 6177 return self.expression(exp.AlterColumn, this=column, comment=self._parse_string()) 6178 if self._match_text_seq("DROP", "NOT", "NULL"): 6179 return self.expression( 6180 exp.AlterColumn, 6181 this=column, 6182 drop=True, 6183 allow_null=True, 6184 ) 6185 if self._match_text_seq("SET", "NOT", "NULL"): 6186 return self.expression( 6187 exp.AlterColumn, 6188 this=column, 6189 allow_null=False, 6190 ) 6191 self._match_text_seq("SET", "DATA") 6192 self._match_text_seq("TYPE") 6193 return self.expression( 6194 exp.AlterColumn, 6195 this=column, 6196 dtype=self._parse_types(), 6197 collate=self._match(TokenType.COLLATE) and self._parse_term(), 6198 using=self._match(TokenType.USING) and self._parse_assignment(), 6199 ) 6200 6201 def _parse_alter_diststyle(self) -> exp.AlterDistStyle: 6202 if self._match_texts(("ALL", "EVEN", "AUTO")): 6203 return self.expression(exp.AlterDistStyle, this=exp.var(self._prev.text.upper())) 6204 6205 self._match_text_seq("KEY", "DISTKEY") 6206 return self.expression(exp.AlterDistStyle, this=self._parse_column()) 6207 6208 def _parse_alter_sortkey(self, compound: t.Optional[bool] = None) -> exp.AlterSortKey: 6209 if compound: 6210 self._match_text_seq("SORTKEY") 6211 6212 if self._match(TokenType.L_PAREN, advance=False): 6213 return self.expression( 6214 exp.AlterSortKey, expressions=self._parse_wrapped_id_vars(), compound=compound 6215 ) 6216 6217 self._match_texts(("AUTO", "NONE")) 6218 return self.expression( 6219 exp.AlterSortKey, this=exp.var(self._prev.text.upper()), compound=compound 6220 ) 6221 6222 def _parse_alter_table_drop(self) -> t.List[exp.Expression]: 6223 index = self._index - 1 6224 6225 partition_exists = self._parse_exists() 6226 if self._match(TokenType.PARTITION, advance=False): 6227 return self._parse_csv(lambda: self._parse_drop_partition(exists=partition_exists)) 6228 6229 self._retreat(index) 6230 return self._parse_csv(self._parse_drop_column) 6231 6232 def _parse_alter_table_rename(self) -> t.Optional[exp.RenameTable | exp.RenameColumn]: 6233 if self._match(TokenType.COLUMN): 6234 exists = self._parse_exists() 6235 old_column = self._parse_column() 6236 to = self._match_text_seq("TO") 6237 new_column = self._parse_column() 6238 6239 if old_column is None or to is None or new_column is None: 6240 return None 6241 6242 return self.expression(exp.RenameColumn, this=old_column, to=new_column, exists=exists) 6243 6244 self._match_text_seq("TO") 6245 return self.expression(exp.RenameTable, this=self._parse_table(schema=True)) 6246 6247 def _parse_alter_table_set(self) -> exp.AlterSet: 6248 alter_set = self.expression(exp.AlterSet) 6249 6250 if self._match(TokenType.L_PAREN, advance=False) or self._match_text_seq( 6251 "TABLE", "PROPERTIES" 6252 ): 6253 alter_set.set("expressions", self._parse_wrapped_csv(self._parse_assignment)) 6254 elif self._match_text_seq("FILESTREAM_ON", advance=False): 6255 alter_set.set("expressions", [self._parse_assignment()]) 6256 elif self._match_texts(("LOGGED", "UNLOGGED")): 6257 alter_set.set("option", exp.var(self._prev.text.upper())) 6258 elif self._match_text_seq("WITHOUT") and self._match_texts(("CLUSTER", "OIDS")): 6259 alter_set.set("option", exp.var(f"WITHOUT {self._prev.text.upper()}")) 6260 elif self._match_text_seq("LOCATION"): 6261 alter_set.set("location", self._parse_field()) 6262 elif self._match_text_seq("ACCESS", "METHOD"): 6263 alter_set.set("access_method", self._parse_field()) 6264 elif self._match_text_seq("TABLESPACE"): 6265 alter_set.set("tablespace", self._parse_field()) 6266 elif self._match_text_seq("FILE", "FORMAT") or self._match_text_seq("FILEFORMAT"): 6267 alter_set.set("file_format", [self._parse_field()]) 6268 elif self._match_text_seq("STAGE_FILE_FORMAT"): 6269 alter_set.set("file_format", self._parse_wrapped_options()) 6270 elif self._match_text_seq("STAGE_COPY_OPTIONS"): 6271 alter_set.set("copy_options", self._parse_wrapped_options()) 6272 elif self._match_text_seq("TAG") or self._match_text_seq("TAGS"): 6273 alter_set.set("tag", self._parse_csv(self._parse_assignment)) 6274 else: 6275 if self._match_text_seq("SERDE"): 6276 alter_set.set("serde", self._parse_field()) 6277 6278 alter_set.set("expressions", [self._parse_properties()]) 6279 6280 return alter_set 6281 6282 def _parse_alter(self) -> exp.AlterTable | exp.Command: 6283 start = self._prev 6284 6285 if not self._match(TokenType.TABLE): 6286 return self._parse_as_command(start) 6287 6288 exists = self._parse_exists() 6289 only = self._match_text_seq("ONLY") 6290 this = self._parse_table(schema=True) 6291 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 6292 6293 if self._next: 6294 self._advance() 6295 6296 parser = self.ALTER_PARSERS.get(self._prev.text.upper()) if self._prev else None 6297 if parser: 6298 actions = ensure_list(parser(self)) 6299 options = self._parse_csv(self._parse_property) 6300 6301 if not self._curr and actions: 6302 return self.expression( 6303 exp.AlterTable, 6304 this=this, 6305 exists=exists, 6306 actions=actions, 6307 only=only, 6308 options=options, 6309 cluster=cluster, 6310 ) 6311 6312 return self._parse_as_command(start) 6313 6314 def _parse_merge(self) -> exp.Merge: 6315 self._match(TokenType.INTO) 6316 target = self._parse_table() 6317 6318 if target and self._match(TokenType.ALIAS, advance=False): 6319 target.set("alias", self._parse_table_alias()) 6320 6321 self._match(TokenType.USING) 6322 using = self._parse_table() 6323 6324 self._match(TokenType.ON) 6325 on = self._parse_assignment() 6326 6327 return self.expression( 6328 exp.Merge, 6329 this=target, 6330 using=using, 6331 on=on, 6332 expressions=self._parse_when_matched(), 6333 ) 6334 6335 def _parse_when_matched(self) -> t.List[exp.When]: 6336 whens = [] 6337 6338 while self._match(TokenType.WHEN): 6339 matched = not self._match(TokenType.NOT) 6340 self._match_text_seq("MATCHED") 6341 source = ( 6342 False 6343 if self._match_text_seq("BY", "TARGET") 6344 else self._match_text_seq("BY", "SOURCE") 6345 ) 6346 condition = self._parse_assignment() if self._match(TokenType.AND) else None 6347 6348 self._match(TokenType.THEN) 6349 6350 if self._match(TokenType.INSERT): 6351 _this = self._parse_star() 6352 if _this: 6353 then: t.Optional[exp.Expression] = self.expression(exp.Insert, this=_this) 6354 else: 6355 then = self.expression( 6356 exp.Insert, 6357 this=self._parse_value(), 6358 expression=self._match_text_seq("VALUES") and self._parse_value(), 6359 ) 6360 elif self._match(TokenType.UPDATE): 6361 expressions = self._parse_star() 6362 if expressions: 6363 then = self.expression(exp.Update, expressions=expressions) 6364 else: 6365 then = self.expression( 6366 exp.Update, 6367 expressions=self._match(TokenType.SET) 6368 and self._parse_csv(self._parse_equality), 6369 ) 6370 elif self._match(TokenType.DELETE): 6371 then = self.expression(exp.Var, this=self._prev.text) 6372 else: 6373 then = None 6374 6375 whens.append( 6376 self.expression( 6377 exp.When, 6378 matched=matched, 6379 source=source, 6380 condition=condition, 6381 then=then, 6382 ) 6383 ) 6384 return whens 6385 6386 def _parse_show(self) -> t.Optional[exp.Expression]: 6387 parser = self._find_parser(self.SHOW_PARSERS, self.SHOW_TRIE) 6388 if parser: 6389 return parser(self) 6390 return self._parse_as_command(self._prev) 6391 6392 def _parse_set_item_assignment( 6393 self, kind: t.Optional[str] = None 6394 ) -> t.Optional[exp.Expression]: 6395 index = self._index 6396 6397 if kind in ("GLOBAL", "SESSION") and self._match_text_seq("TRANSACTION"): 6398 return self._parse_set_transaction(global_=kind == "GLOBAL") 6399 6400 left = self._parse_primary() or self._parse_column() 6401 assignment_delimiter = self._match_texts(("=", "TO")) 6402 6403 if not left or (self.SET_REQUIRES_ASSIGNMENT_DELIMITER and not assignment_delimiter): 6404 self._retreat(index) 6405 return None 6406 6407 right = self._parse_statement() or self._parse_id_var() 6408 if isinstance(right, (exp.Column, exp.Identifier)): 6409 right = exp.var(right.name) 6410 6411 this = self.expression(exp.EQ, this=left, expression=right) 6412 return self.expression(exp.SetItem, this=this, kind=kind) 6413 6414 def _parse_set_transaction(self, global_: bool = False) -> exp.Expression: 6415 self._match_text_seq("TRANSACTION") 6416 characteristics = self._parse_csv( 6417 lambda: self._parse_var_from_options(self.TRANSACTION_CHARACTERISTICS) 6418 ) 6419 return self.expression( 6420 exp.SetItem, 6421 expressions=characteristics, 6422 kind="TRANSACTION", 6423 **{"global": global_}, # type: ignore 6424 ) 6425 6426 def _parse_set_item(self) -> t.Optional[exp.Expression]: 6427 parser = self._find_parser(self.SET_PARSERS, self.SET_TRIE) 6428 return parser(self) if parser else self._parse_set_item_assignment(kind=None) 6429 6430 def _parse_set(self, unset: bool = False, tag: bool = False) -> exp.Set | exp.Command: 6431 index = self._index 6432 set_ = self.expression( 6433 exp.Set, expressions=self._parse_csv(self._parse_set_item), unset=unset, tag=tag 6434 ) 6435 6436 if self._curr: 6437 self._retreat(index) 6438 return self._parse_as_command(self._prev) 6439 6440 return set_ 6441 6442 def _parse_var_from_options( 6443 self, options: OPTIONS_TYPE, raise_unmatched: bool = True 6444 ) -> t.Optional[exp.Var]: 6445 start = self._curr 6446 if not start: 6447 return None 6448 6449 option = start.text.upper() 6450 continuations = options.get(option) 6451 6452 index = self._index 6453 self._advance() 6454 for keywords in continuations or []: 6455 if isinstance(keywords, str): 6456 keywords = (keywords,) 6457 6458 if self._match_text_seq(*keywords): 6459 option = f"{option} {' '.join(keywords)}" 6460 break 6461 else: 6462 if continuations or continuations is None: 6463 if raise_unmatched: 6464 self.raise_error(f"Unknown option {option}") 6465 6466 self._retreat(index) 6467 return None 6468 6469 return exp.var(option) 6470 6471 def _parse_as_command(self, start: Token) -> exp.Command: 6472 while self._curr: 6473 self._advance() 6474 text = self._find_sql(start, self._prev) 6475 size = len(start.text) 6476 self._warn_unsupported() 6477 return exp.Command(this=text[:size], expression=text[size:]) 6478 6479 def _parse_dict_property(self, this: str) -> exp.DictProperty: 6480 settings = [] 6481 6482 self._match_l_paren() 6483 kind = self._parse_id_var() 6484 6485 if self._match(TokenType.L_PAREN): 6486 while True: 6487 key = self._parse_id_var() 6488 value = self._parse_primary() 6489 6490 if not key and value is None: 6491 break 6492 settings.append(self.expression(exp.DictSubProperty, this=key, value=value)) 6493 self._match(TokenType.R_PAREN) 6494 6495 self._match_r_paren() 6496 6497 return self.expression( 6498 exp.DictProperty, 6499 this=this, 6500 kind=kind.this if kind else None, 6501 settings=settings, 6502 ) 6503 6504 def _parse_dict_range(self, this: str) -> exp.DictRange: 6505 self._match_l_paren() 6506 has_min = self._match_text_seq("MIN") 6507 if has_min: 6508 min = self._parse_var() or self._parse_primary() 6509 self._match_text_seq("MAX") 6510 max = self._parse_var() or self._parse_primary() 6511 else: 6512 max = self._parse_var() or self._parse_primary() 6513 min = exp.Literal.number(0) 6514 self._match_r_paren() 6515 return self.expression(exp.DictRange, this=this, min=min, max=max) 6516 6517 def _parse_comprehension( 6518 self, this: t.Optional[exp.Expression] 6519 ) -> t.Optional[exp.Comprehension]: 6520 index = self._index 6521 expression = self._parse_column() 6522 if not self._match(TokenType.IN): 6523 self._retreat(index - 1) 6524 return None 6525 iterator = self._parse_column() 6526 condition = self._parse_assignment() if self._match_text_seq("IF") else None 6527 return self.expression( 6528 exp.Comprehension, 6529 this=this, 6530 expression=expression, 6531 iterator=iterator, 6532 condition=condition, 6533 ) 6534 6535 def _parse_heredoc(self) -> t.Optional[exp.Heredoc]: 6536 if self._match(TokenType.HEREDOC_STRING): 6537 return self.expression(exp.Heredoc, this=self._prev.text) 6538 6539 if not self._match_text_seq("$"): 6540 return None 6541 6542 tags = ["$"] 6543 tag_text = None 6544 6545 if self._is_connected(): 6546 self._advance() 6547 tags.append(self._prev.text.upper()) 6548 else: 6549 self.raise_error("No closing $ found") 6550 6551 if tags[-1] != "$": 6552 if self._is_connected() and self._match_text_seq("$"): 6553 tag_text = tags[-1] 6554 tags.append("$") 6555 else: 6556 self.raise_error("No closing $ found") 6557 6558 heredoc_start = self._curr 6559 6560 while self._curr: 6561 if self._match_text_seq(*tags, advance=False): 6562 this = self._find_sql(heredoc_start, self._prev) 6563 self._advance(len(tags)) 6564 return self.expression(exp.Heredoc, this=this, tag=tag_text) 6565 6566 self._advance() 6567 6568 self.raise_error(f"No closing {''.join(tags)} found") 6569 return None 6570 6571 def _find_parser( 6572 self, parsers: t.Dict[str, t.Callable], trie: t.Dict 6573 ) -> t.Optional[t.Callable]: 6574 if not self._curr: 6575 return None 6576 6577 index = self._index 6578 this = [] 6579 while True: 6580 # The current token might be multiple words 6581 curr = self._curr.text.upper() 6582 key = curr.split(" ") 6583 this.append(curr) 6584 6585 self._advance() 6586 result, trie = in_trie(trie, key) 6587 if result == TrieResult.FAILED: 6588 break 6589 6590 if result == TrieResult.EXISTS: 6591 subparser = parsers[" ".join(this)] 6592 return subparser 6593 6594 self._retreat(index) 6595 return None 6596 6597 def _match(self, token_type, advance=True, expression=None): 6598 if not self._curr: 6599 return None 6600 6601 if self._curr.token_type == token_type: 6602 if advance: 6603 self._advance() 6604 self._add_comments(expression) 6605 return True 6606 6607 return None 6608 6609 def _match_set(self, types, advance=True): 6610 if not self._curr: 6611 return None 6612 6613 if self._curr.token_type in types: 6614 if advance: 6615 self._advance() 6616 return True 6617 6618 return None 6619 6620 def _match_pair(self, token_type_a, token_type_b, advance=True): 6621 if not self._curr or not self._next: 6622 return None 6623 6624 if self._curr.token_type == token_type_a and self._next.token_type == token_type_b: 6625 if advance: 6626 self._advance(2) 6627 return True 6628 6629 return None 6630 6631 def _match_l_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 6632 if not self._match(TokenType.L_PAREN, expression=expression): 6633 self.raise_error("Expecting (") 6634 6635 def _match_r_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 6636 if not self._match(TokenType.R_PAREN, expression=expression): 6637 self.raise_error("Expecting )") 6638 6639 def _match_texts(self, texts, advance=True): 6640 if self._curr and self._curr.text.upper() in texts: 6641 if advance: 6642 self._advance() 6643 return True 6644 return None 6645 6646 def _match_text_seq(self, *texts, advance=True): 6647 index = self._index 6648 for text in texts: 6649 if self._curr and self._curr.text.upper() == text: 6650 self._advance() 6651 else: 6652 self._retreat(index) 6653 return None 6654 6655 if not advance: 6656 self._retreat(index) 6657 6658 return True 6659 6660 def _replace_lambda( 6661 self, node: t.Optional[exp.Expression], expressions: t.List[exp.Expression] 6662 ) -> t.Optional[exp.Expression]: 6663 if not node: 6664 return node 6665 6666 lambda_types = {e.name: e.args.get("to") or False for e in expressions} 6667 6668 for column in node.find_all(exp.Column): 6669 typ = lambda_types.get(column.parts[0].name) 6670 if typ is not None: 6671 dot_or_id = column.to_dot() if column.table else column.this 6672 6673 if typ: 6674 dot_or_id = self.expression( 6675 exp.Cast, 6676 this=dot_or_id, 6677 to=typ, 6678 ) 6679 6680 parent = column.parent 6681 6682 while isinstance(parent, exp.Dot): 6683 if not isinstance(parent.parent, exp.Dot): 6684 parent.replace(dot_or_id) 6685 break 6686 parent = parent.parent 6687 else: 6688 if column is node: 6689 node = dot_or_id 6690 else: 6691 column.replace(dot_or_id) 6692 return node 6693 6694 def _parse_truncate_table(self) -> t.Optional[exp.TruncateTable] | exp.Expression: 6695 start = self._prev 6696 6697 # Not to be confused with TRUNCATE(number, decimals) function call 6698 if self._match(TokenType.L_PAREN): 6699 self._retreat(self._index - 2) 6700 return self._parse_function() 6701 6702 # Clickhouse supports TRUNCATE DATABASE as well 6703 is_database = self._match(TokenType.DATABASE) 6704 6705 self._match(TokenType.TABLE) 6706 6707 exists = self._parse_exists(not_=False) 6708 6709 expressions = self._parse_csv( 6710 lambda: self._parse_table(schema=True, is_db_reference=is_database) 6711 ) 6712 6713 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 6714 6715 if self._match_text_seq("RESTART", "IDENTITY"): 6716 identity = "RESTART" 6717 elif self._match_text_seq("CONTINUE", "IDENTITY"): 6718 identity = "CONTINUE" 6719 else: 6720 identity = None 6721 6722 if self._match_text_seq("CASCADE") or self._match_text_seq("RESTRICT"): 6723 option = self._prev.text 6724 else: 6725 option = None 6726 6727 partition = self._parse_partition() 6728 6729 # Fallback case 6730 if self._curr: 6731 return self._parse_as_command(start) 6732 6733 return self.expression( 6734 exp.TruncateTable, 6735 expressions=expressions, 6736 is_database=is_database, 6737 exists=exists, 6738 cluster=cluster, 6739 identity=identity, 6740 option=option, 6741 partition=partition, 6742 ) 6743 6744 def _parse_with_operator(self) -> t.Optional[exp.Expression]: 6745 this = self._parse_ordered(self._parse_opclass) 6746 6747 if not self._match(TokenType.WITH): 6748 return this 6749 6750 op = self._parse_var(any_token=True) 6751 6752 return self.expression(exp.WithOperator, this=this, op=op) 6753 6754 def _parse_wrapped_options(self) -> t.List[t.Optional[exp.Expression]]: 6755 self._match(TokenType.EQ) 6756 self._match(TokenType.L_PAREN) 6757 6758 opts: t.List[t.Optional[exp.Expression]] = [] 6759 while self._curr and not self._match(TokenType.R_PAREN): 6760 if self._match_text_seq("FORMAT_NAME", "="): 6761 # The FORMAT_NAME can be set to an identifier for Snowflake and T-SQL, 6762 # so we parse it separately to use _parse_field() 6763 prop = self.expression( 6764 exp.Property, this=exp.var("FORMAT_NAME"), value=self._parse_field() 6765 ) 6766 opts.append(prop) 6767 else: 6768 opts.append(self._parse_property()) 6769 6770 self._match(TokenType.COMMA) 6771 6772 return opts 6773 6774 def _parse_copy_parameters(self) -> t.List[exp.CopyParameter]: 6775 sep = TokenType.COMMA if self.dialect.COPY_PARAMS_ARE_CSV else None 6776 6777 options = [] 6778 while self._curr and not self._match(TokenType.R_PAREN, advance=False): 6779 option = self._parse_var(any_token=True) 6780 prev = self._prev.text.upper() 6781 6782 # Different dialects might separate options and values by white space, "=" and "AS" 6783 self._match(TokenType.EQ) 6784 self._match(TokenType.ALIAS) 6785 6786 param = self.expression(exp.CopyParameter, this=option) 6787 6788 if prev in self.COPY_INTO_VARLEN_OPTIONS and self._match( 6789 TokenType.L_PAREN, advance=False 6790 ): 6791 # Snowflake FILE_FORMAT case, Databricks COPY & FORMAT options 6792 param.set("expressions", self._parse_wrapped_options()) 6793 elif prev == "FILE_FORMAT": 6794 # T-SQL's external file format case 6795 param.set("expression", self._parse_field()) 6796 else: 6797 param.set("expression", self._parse_unquoted_field()) 6798 6799 options.append(param) 6800 self._match(sep) 6801 6802 return options 6803 6804 def _parse_credentials(self) -> t.Optional[exp.Credentials]: 6805 expr = self.expression(exp.Credentials) 6806 6807 if self._match_text_seq("STORAGE_INTEGRATION", "="): 6808 expr.set("storage", self._parse_field()) 6809 if self._match_text_seq("CREDENTIALS"): 6810 # Snowflake case: CREDENTIALS = (...), Redshift case: CREDENTIALS <string> 6811 creds = ( 6812 self._parse_wrapped_options() if self._match(TokenType.EQ) else self._parse_field() 6813 ) 6814 expr.set("credentials", creds) 6815 if self._match_text_seq("ENCRYPTION"): 6816 expr.set("encryption", self._parse_wrapped_options()) 6817 if self._match_text_seq("IAM_ROLE"): 6818 expr.set("iam_role", self._parse_field()) 6819 if self._match_text_seq("REGION"): 6820 expr.set("region", self._parse_field()) 6821 6822 return expr 6823 6824 def _parse_file_location(self) -> t.Optional[exp.Expression]: 6825 return self._parse_field() 6826 6827 def _parse_copy(self) -> exp.Copy | exp.Command: 6828 start = self._prev 6829 6830 self._match(TokenType.INTO) 6831 6832 this = ( 6833 self._parse_select(nested=True, parse_subquery_alias=False) 6834 if self._match(TokenType.L_PAREN, advance=False) 6835 else self._parse_table(schema=True) 6836 ) 6837 6838 kind = self._match(TokenType.FROM) or not self._match_text_seq("TO") 6839 6840 files = self._parse_csv(self._parse_file_location) 6841 credentials = self._parse_credentials() 6842 6843 self._match_text_seq("WITH") 6844 6845 params = self._parse_wrapped(self._parse_copy_parameters, optional=True) 6846 6847 # Fallback case 6848 if self._curr: 6849 return self._parse_as_command(start) 6850 6851 return self.expression( 6852 exp.Copy, 6853 this=this, 6854 kind=kind, 6855 credentials=credentials, 6856 files=files, 6857 params=params, 6858 )
26def build_var_map(args: t.List) -> exp.StarMap | exp.VarMap: 27 if len(args) == 1 and args[0].is_star: 28 return exp.StarMap(this=args[0]) 29 30 keys = [] 31 values = [] 32 for i in range(0, len(args), 2): 33 keys.append(args[i]) 34 values.append(args[i + 1]) 35 36 return exp.VarMap(keys=exp.array(*keys, copy=False), values=exp.array(*values, copy=False))
44def binary_range_parser( 45 expr_type: t.Type[exp.Expression], reverse_args: bool = False 46) -> t.Callable[[Parser, t.Optional[exp.Expression]], t.Optional[exp.Expression]]: 47 def _parse_binary_range( 48 self: Parser, this: t.Optional[exp.Expression] 49 ) -> t.Optional[exp.Expression]: 50 expression = self._parse_bitwise() 51 if reverse_args: 52 this, expression = expression, this 53 return self._parse_escape(self.expression(expr_type, this=this, expression=expression)) 54 55 return _parse_binary_range
58def build_logarithm(args: t.List, dialect: Dialect) -> exp.Func: 59 # Default argument order is base, expression 60 this = seq_get(args, 0) 61 expression = seq_get(args, 1) 62 63 if expression: 64 if not dialect.LOG_BASE_FIRST: 65 this, expression = expression, this 66 return exp.Log(this=this, expression=expression) 67 68 return (exp.Ln if dialect.parser_class.LOG_DEFAULTS_TO_LN else exp.Log)(this=this)
88def build_extract_json_with_path(expr_type: t.Type[E]) -> t.Callable[[t.List, Dialect], E]: 89 def _builder(args: t.List, dialect: Dialect) -> E: 90 expression = expr_type( 91 this=seq_get(args, 0), expression=dialect.to_json_path(seq_get(args, 1)) 92 ) 93 if len(args) > 2 and expr_type is exp.JSONExtract: 94 expression.set("expressions", args[2:]) 95 96 return expression 97 98 return _builder
101def build_mod(args: t.List) -> exp.Mod: 102 this = seq_get(args, 0) 103 expression = seq_get(args, 1) 104 105 # Wrap the operands if they are binary nodes, e.g. MOD(a + 1, 7) -> (a + 1) % 7 106 this = exp.Paren(this=this) if isinstance(this, exp.Binary) else this 107 expression = exp.Paren(this=expression) if isinstance(expression, exp.Binary) else expression 108 109 return exp.Mod(this=this, expression=expression)
122class Parser(metaclass=_Parser): 123 """ 124 Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree. 125 126 Args: 127 error_level: The desired error level. 128 Default: ErrorLevel.IMMEDIATE 129 error_message_context: The amount of context to capture from a query string when displaying 130 the error message (in number of characters). 131 Default: 100 132 max_errors: Maximum number of error messages to include in a raised ParseError. 133 This is only relevant if error_level is ErrorLevel.RAISE. 134 Default: 3 135 """ 136 137 FUNCTIONS: t.Dict[str, t.Callable] = { 138 **{name: func.from_arg_list for name, func in exp.FUNCTION_BY_NAME.items()}, 139 "CONCAT": lambda args, dialect: exp.Concat( 140 expressions=args, 141 safe=not dialect.STRICT_STRING_CONCAT, 142 coalesce=dialect.CONCAT_COALESCE, 143 ), 144 "CONCAT_WS": lambda args, dialect: exp.ConcatWs( 145 expressions=args, 146 safe=not dialect.STRICT_STRING_CONCAT, 147 coalesce=dialect.CONCAT_COALESCE, 148 ), 149 "DATE_TO_DATE_STR": lambda args: exp.Cast( 150 this=seq_get(args, 0), 151 to=exp.DataType(this=exp.DataType.Type.TEXT), 152 ), 153 "GLOB": lambda args: exp.Glob(this=seq_get(args, 1), expression=seq_get(args, 0)), 154 "HEX": build_hex, 155 "JSON_EXTRACT": build_extract_json_with_path(exp.JSONExtract), 156 "JSON_EXTRACT_SCALAR": build_extract_json_with_path(exp.JSONExtractScalar), 157 "JSON_EXTRACT_PATH_TEXT": build_extract_json_with_path(exp.JSONExtractScalar), 158 "LIKE": build_like, 159 "LOG": build_logarithm, 160 "LOG2": lambda args: exp.Log(this=exp.Literal.number(2), expression=seq_get(args, 0)), 161 "LOG10": lambda args: exp.Log(this=exp.Literal.number(10), expression=seq_get(args, 0)), 162 "LOWER": build_lower, 163 "MOD": build_mod, 164 "SCOPE_RESOLUTION": lambda args: exp.ScopeResolution(expression=seq_get(args, 0)) 165 if len(args) != 2 166 else exp.ScopeResolution(this=seq_get(args, 0), expression=seq_get(args, 1)), 167 "TIME_TO_TIME_STR": lambda args: exp.Cast( 168 this=seq_get(args, 0), 169 to=exp.DataType(this=exp.DataType.Type.TEXT), 170 ), 171 "TO_HEX": build_hex, 172 "TS_OR_DS_TO_DATE_STR": lambda args: exp.Substring( 173 this=exp.Cast( 174 this=seq_get(args, 0), 175 to=exp.DataType(this=exp.DataType.Type.TEXT), 176 ), 177 start=exp.Literal.number(1), 178 length=exp.Literal.number(10), 179 ), 180 "UNNEST": lambda args: exp.Unnest(expressions=ensure_list(seq_get(args, 0))), 181 "UPPER": build_upper, 182 "VAR_MAP": build_var_map, 183 } 184 185 NO_PAREN_FUNCTIONS = { 186 TokenType.CURRENT_DATE: exp.CurrentDate, 187 TokenType.CURRENT_DATETIME: exp.CurrentDate, 188 TokenType.CURRENT_TIME: exp.CurrentTime, 189 TokenType.CURRENT_TIMESTAMP: exp.CurrentTimestamp, 190 TokenType.CURRENT_USER: exp.CurrentUser, 191 } 192 193 STRUCT_TYPE_TOKENS = { 194 TokenType.NESTED, 195 TokenType.OBJECT, 196 TokenType.STRUCT, 197 } 198 199 NESTED_TYPE_TOKENS = { 200 TokenType.ARRAY, 201 TokenType.LIST, 202 TokenType.LOWCARDINALITY, 203 TokenType.MAP, 204 TokenType.NULLABLE, 205 *STRUCT_TYPE_TOKENS, 206 } 207 208 ENUM_TYPE_TOKENS = { 209 TokenType.ENUM, 210 TokenType.ENUM8, 211 TokenType.ENUM16, 212 } 213 214 AGGREGATE_TYPE_TOKENS = { 215 TokenType.AGGREGATEFUNCTION, 216 TokenType.SIMPLEAGGREGATEFUNCTION, 217 } 218 219 TYPE_TOKENS = { 220 TokenType.BIT, 221 TokenType.BOOLEAN, 222 TokenType.TINYINT, 223 TokenType.UTINYINT, 224 TokenType.SMALLINT, 225 TokenType.USMALLINT, 226 TokenType.INT, 227 TokenType.UINT, 228 TokenType.BIGINT, 229 TokenType.UBIGINT, 230 TokenType.INT128, 231 TokenType.UINT128, 232 TokenType.INT256, 233 TokenType.UINT256, 234 TokenType.MEDIUMINT, 235 TokenType.UMEDIUMINT, 236 TokenType.FIXEDSTRING, 237 TokenType.FLOAT, 238 TokenType.DOUBLE, 239 TokenType.CHAR, 240 TokenType.NCHAR, 241 TokenType.VARCHAR, 242 TokenType.NVARCHAR, 243 TokenType.BPCHAR, 244 TokenType.TEXT, 245 TokenType.MEDIUMTEXT, 246 TokenType.LONGTEXT, 247 TokenType.MEDIUMBLOB, 248 TokenType.LONGBLOB, 249 TokenType.BINARY, 250 TokenType.VARBINARY, 251 TokenType.JSON, 252 TokenType.JSONB, 253 TokenType.INTERVAL, 254 TokenType.TINYBLOB, 255 TokenType.TINYTEXT, 256 TokenType.TIME, 257 TokenType.TIMETZ, 258 TokenType.TIMESTAMP, 259 TokenType.TIMESTAMP_S, 260 TokenType.TIMESTAMP_MS, 261 TokenType.TIMESTAMP_NS, 262 TokenType.TIMESTAMPTZ, 263 TokenType.TIMESTAMPLTZ, 264 TokenType.TIMESTAMPNTZ, 265 TokenType.DATETIME, 266 TokenType.DATETIME64, 267 TokenType.DATE, 268 TokenType.DATE32, 269 TokenType.INT4RANGE, 270 TokenType.INT4MULTIRANGE, 271 TokenType.INT8RANGE, 272 TokenType.INT8MULTIRANGE, 273 TokenType.NUMRANGE, 274 TokenType.NUMMULTIRANGE, 275 TokenType.TSRANGE, 276 TokenType.TSMULTIRANGE, 277 TokenType.TSTZRANGE, 278 TokenType.TSTZMULTIRANGE, 279 TokenType.DATERANGE, 280 TokenType.DATEMULTIRANGE, 281 TokenType.DECIMAL, 282 TokenType.UDECIMAL, 283 TokenType.BIGDECIMAL, 284 TokenType.UUID, 285 TokenType.GEOGRAPHY, 286 TokenType.GEOMETRY, 287 TokenType.HLLSKETCH, 288 TokenType.HSTORE, 289 TokenType.PSEUDO_TYPE, 290 TokenType.SUPER, 291 TokenType.SERIAL, 292 TokenType.SMALLSERIAL, 293 TokenType.BIGSERIAL, 294 TokenType.XML, 295 TokenType.YEAR, 296 TokenType.UNIQUEIDENTIFIER, 297 TokenType.USERDEFINED, 298 TokenType.MONEY, 299 TokenType.SMALLMONEY, 300 TokenType.ROWVERSION, 301 TokenType.IMAGE, 302 TokenType.VARIANT, 303 TokenType.OBJECT, 304 TokenType.OBJECT_IDENTIFIER, 305 TokenType.INET, 306 TokenType.IPADDRESS, 307 TokenType.IPPREFIX, 308 TokenType.IPV4, 309 TokenType.IPV6, 310 TokenType.UNKNOWN, 311 TokenType.NULL, 312 TokenType.NAME, 313 TokenType.TDIGEST, 314 *ENUM_TYPE_TOKENS, 315 *NESTED_TYPE_TOKENS, 316 *AGGREGATE_TYPE_TOKENS, 317 } 318 319 SIGNED_TO_UNSIGNED_TYPE_TOKEN = { 320 TokenType.BIGINT: TokenType.UBIGINT, 321 TokenType.INT: TokenType.UINT, 322 TokenType.MEDIUMINT: TokenType.UMEDIUMINT, 323 TokenType.SMALLINT: TokenType.USMALLINT, 324 TokenType.TINYINT: TokenType.UTINYINT, 325 TokenType.DECIMAL: TokenType.UDECIMAL, 326 } 327 328 SUBQUERY_PREDICATES = { 329 TokenType.ANY: exp.Any, 330 TokenType.ALL: exp.All, 331 TokenType.EXISTS: exp.Exists, 332 TokenType.SOME: exp.Any, 333 } 334 335 RESERVED_TOKENS = { 336 *Tokenizer.SINGLE_TOKENS.values(), 337 TokenType.SELECT, 338 } - {TokenType.IDENTIFIER} 339 340 DB_CREATABLES = { 341 TokenType.DATABASE, 342 TokenType.DICTIONARY, 343 TokenType.MODEL, 344 TokenType.SCHEMA, 345 TokenType.SEQUENCE, 346 TokenType.STORAGE_INTEGRATION, 347 TokenType.TABLE, 348 TokenType.TAG, 349 TokenType.VIEW, 350 TokenType.WAREHOUSE, 351 TokenType.STREAMLIT, 352 } 353 354 CREATABLES = { 355 TokenType.COLUMN, 356 TokenType.CONSTRAINT, 357 TokenType.FOREIGN_KEY, 358 TokenType.FUNCTION, 359 TokenType.INDEX, 360 TokenType.PROCEDURE, 361 *DB_CREATABLES, 362 } 363 364 # Tokens that can represent identifiers 365 ID_VAR_TOKENS = { 366 TokenType.VAR, 367 TokenType.ANTI, 368 TokenType.APPLY, 369 TokenType.ASC, 370 TokenType.ASOF, 371 TokenType.AUTO_INCREMENT, 372 TokenType.BEGIN, 373 TokenType.BPCHAR, 374 TokenType.CACHE, 375 TokenType.CASE, 376 TokenType.COLLATE, 377 TokenType.COMMAND, 378 TokenType.COMMENT, 379 TokenType.COMMIT, 380 TokenType.CONSTRAINT, 381 TokenType.COPY, 382 TokenType.DEFAULT, 383 TokenType.DELETE, 384 TokenType.DESC, 385 TokenType.DESCRIBE, 386 TokenType.DICTIONARY, 387 TokenType.DIV, 388 TokenType.END, 389 TokenType.EXECUTE, 390 TokenType.ESCAPE, 391 TokenType.FALSE, 392 TokenType.FIRST, 393 TokenType.FILTER, 394 TokenType.FINAL, 395 TokenType.FORMAT, 396 TokenType.FULL, 397 TokenType.IDENTIFIER, 398 TokenType.IS, 399 TokenType.ISNULL, 400 TokenType.INTERVAL, 401 TokenType.KEEP, 402 TokenType.KILL, 403 TokenType.LEFT, 404 TokenType.LOAD, 405 TokenType.MERGE, 406 TokenType.NATURAL, 407 TokenType.NEXT, 408 TokenType.OFFSET, 409 TokenType.OPERATOR, 410 TokenType.ORDINALITY, 411 TokenType.OVERLAPS, 412 TokenType.OVERWRITE, 413 TokenType.PARTITION, 414 TokenType.PERCENT, 415 TokenType.PIVOT, 416 TokenType.PRAGMA, 417 TokenType.RANGE, 418 TokenType.RECURSIVE, 419 TokenType.REFERENCES, 420 TokenType.REFRESH, 421 TokenType.REPLACE, 422 TokenType.RIGHT, 423 TokenType.ROLLUP, 424 TokenType.ROW, 425 TokenType.ROWS, 426 TokenType.SEMI, 427 TokenType.SET, 428 TokenType.SETTINGS, 429 TokenType.SHOW, 430 TokenType.TEMPORARY, 431 TokenType.TOP, 432 TokenType.TRUE, 433 TokenType.TRUNCATE, 434 TokenType.UNIQUE, 435 TokenType.UNNEST, 436 TokenType.UNPIVOT, 437 TokenType.UPDATE, 438 TokenType.USE, 439 TokenType.VOLATILE, 440 TokenType.WINDOW, 441 *CREATABLES, 442 *SUBQUERY_PREDICATES, 443 *TYPE_TOKENS, 444 *NO_PAREN_FUNCTIONS, 445 } 446 447 INTERVAL_VARS = ID_VAR_TOKENS - {TokenType.END} 448 449 TABLE_ALIAS_TOKENS = ID_VAR_TOKENS - { 450 TokenType.ANTI, 451 TokenType.APPLY, 452 TokenType.ASOF, 453 TokenType.FULL, 454 TokenType.LEFT, 455 TokenType.LOCK, 456 TokenType.NATURAL, 457 TokenType.OFFSET, 458 TokenType.RIGHT, 459 TokenType.SEMI, 460 TokenType.WINDOW, 461 } 462 463 ALIAS_TOKENS = ID_VAR_TOKENS 464 465 ARRAY_CONSTRUCTORS = { 466 "ARRAY": exp.Array, 467 "LIST": exp.List, 468 } 469 470 COMMENT_TABLE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.IS} 471 472 UPDATE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.SET} 473 474 TRIM_TYPES = {"LEADING", "TRAILING", "BOTH"} 475 476 FUNC_TOKENS = { 477 TokenType.COLLATE, 478 TokenType.COMMAND, 479 TokenType.CURRENT_DATE, 480 TokenType.CURRENT_DATETIME, 481 TokenType.CURRENT_TIMESTAMP, 482 TokenType.CURRENT_TIME, 483 TokenType.CURRENT_USER, 484 TokenType.FILTER, 485 TokenType.FIRST, 486 TokenType.FORMAT, 487 TokenType.GLOB, 488 TokenType.IDENTIFIER, 489 TokenType.INDEX, 490 TokenType.ISNULL, 491 TokenType.ILIKE, 492 TokenType.INSERT, 493 TokenType.LIKE, 494 TokenType.MERGE, 495 TokenType.OFFSET, 496 TokenType.PRIMARY_KEY, 497 TokenType.RANGE, 498 TokenType.REPLACE, 499 TokenType.RLIKE, 500 TokenType.ROW, 501 TokenType.UNNEST, 502 TokenType.VAR, 503 TokenType.LEFT, 504 TokenType.RIGHT, 505 TokenType.SEQUENCE, 506 TokenType.DATE, 507 TokenType.DATETIME, 508 TokenType.TABLE, 509 TokenType.TIMESTAMP, 510 TokenType.TIMESTAMPTZ, 511 TokenType.TRUNCATE, 512 TokenType.WINDOW, 513 TokenType.XOR, 514 *TYPE_TOKENS, 515 *SUBQUERY_PREDICATES, 516 } 517 518 CONJUNCTION: t.Dict[TokenType, t.Type[exp.Expression]] = { 519 TokenType.AND: exp.And, 520 } 521 522 ASSIGNMENT: t.Dict[TokenType, t.Type[exp.Expression]] = { 523 TokenType.COLON_EQ: exp.PropertyEQ, 524 } 525 526 DISJUNCTION: t.Dict[TokenType, t.Type[exp.Expression]] = { 527 TokenType.OR: exp.Or, 528 } 529 530 EQUALITY = { 531 TokenType.EQ: exp.EQ, 532 TokenType.NEQ: exp.NEQ, 533 TokenType.NULLSAFE_EQ: exp.NullSafeEQ, 534 } 535 536 COMPARISON = { 537 TokenType.GT: exp.GT, 538 TokenType.GTE: exp.GTE, 539 TokenType.LT: exp.LT, 540 TokenType.LTE: exp.LTE, 541 } 542 543 BITWISE = { 544 TokenType.AMP: exp.BitwiseAnd, 545 TokenType.CARET: exp.BitwiseXor, 546 TokenType.PIPE: exp.BitwiseOr, 547 } 548 549 TERM = { 550 TokenType.DASH: exp.Sub, 551 TokenType.PLUS: exp.Add, 552 TokenType.MOD: exp.Mod, 553 TokenType.COLLATE: exp.Collate, 554 } 555 556 FACTOR = { 557 TokenType.DIV: exp.IntDiv, 558 TokenType.LR_ARROW: exp.Distance, 559 TokenType.SLASH: exp.Div, 560 TokenType.STAR: exp.Mul, 561 } 562 563 EXPONENT: t.Dict[TokenType, t.Type[exp.Expression]] = {} 564 565 TIMES = { 566 TokenType.TIME, 567 TokenType.TIMETZ, 568 } 569 570 TIMESTAMPS = { 571 TokenType.TIMESTAMP, 572 TokenType.TIMESTAMPTZ, 573 TokenType.TIMESTAMPLTZ, 574 *TIMES, 575 } 576 577 SET_OPERATIONS = { 578 TokenType.UNION, 579 TokenType.INTERSECT, 580 TokenType.EXCEPT, 581 } 582 583 JOIN_METHODS = { 584 TokenType.ASOF, 585 TokenType.NATURAL, 586 TokenType.POSITIONAL, 587 } 588 589 JOIN_SIDES = { 590 TokenType.LEFT, 591 TokenType.RIGHT, 592 TokenType.FULL, 593 } 594 595 JOIN_KINDS = { 596 TokenType.ANTI, 597 TokenType.CROSS, 598 TokenType.INNER, 599 TokenType.OUTER, 600 TokenType.SEMI, 601 TokenType.STRAIGHT_JOIN, 602 } 603 604 JOIN_HINTS: t.Set[str] = set() 605 606 LAMBDAS = { 607 TokenType.ARROW: lambda self, expressions: self.expression( 608 exp.Lambda, 609 this=self._replace_lambda( 610 self._parse_assignment(), 611 expressions, 612 ), 613 expressions=expressions, 614 ), 615 TokenType.FARROW: lambda self, expressions: self.expression( 616 exp.Kwarg, 617 this=exp.var(expressions[0].name), 618 expression=self._parse_assignment(), 619 ), 620 } 621 622 COLUMN_OPERATORS = { 623 TokenType.DOT: None, 624 TokenType.DCOLON: lambda self, this, to: self.expression( 625 exp.Cast if self.STRICT_CAST else exp.TryCast, 626 this=this, 627 to=to, 628 ), 629 TokenType.ARROW: lambda self, this, path: self.expression( 630 exp.JSONExtract, 631 this=this, 632 expression=self.dialect.to_json_path(path), 633 only_json_types=self.JSON_ARROWS_REQUIRE_JSON_TYPE, 634 ), 635 TokenType.DARROW: lambda self, this, path: self.expression( 636 exp.JSONExtractScalar, 637 this=this, 638 expression=self.dialect.to_json_path(path), 639 only_json_types=self.JSON_ARROWS_REQUIRE_JSON_TYPE, 640 ), 641 TokenType.HASH_ARROW: lambda self, this, path: self.expression( 642 exp.JSONBExtract, 643 this=this, 644 expression=path, 645 ), 646 TokenType.DHASH_ARROW: lambda self, this, path: self.expression( 647 exp.JSONBExtractScalar, 648 this=this, 649 expression=path, 650 ), 651 TokenType.PLACEHOLDER: lambda self, this, key: self.expression( 652 exp.JSONBContains, 653 this=this, 654 expression=key, 655 ), 656 } 657 658 EXPRESSION_PARSERS = { 659 exp.Cluster: lambda self: self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 660 exp.Column: lambda self: self._parse_column(), 661 exp.Condition: lambda self: self._parse_assignment(), 662 exp.DataType: lambda self: self._parse_types(allow_identifiers=False), 663 exp.Expression: lambda self: self._parse_expression(), 664 exp.From: lambda self: self._parse_from(joins=True), 665 exp.Group: lambda self: self._parse_group(), 666 exp.Having: lambda self: self._parse_having(), 667 exp.Identifier: lambda self: self._parse_id_var(), 668 exp.Join: lambda self: self._parse_join(), 669 exp.Lambda: lambda self: self._parse_lambda(), 670 exp.Lateral: lambda self: self._parse_lateral(), 671 exp.Limit: lambda self: self._parse_limit(), 672 exp.Offset: lambda self: self._parse_offset(), 673 exp.Order: lambda self: self._parse_order(), 674 exp.Ordered: lambda self: self._parse_ordered(), 675 exp.Properties: lambda self: self._parse_properties(), 676 exp.Qualify: lambda self: self._parse_qualify(), 677 exp.Returning: lambda self: self._parse_returning(), 678 exp.Sort: lambda self: self._parse_sort(exp.Sort, TokenType.SORT_BY), 679 exp.Table: lambda self: self._parse_table_parts(), 680 exp.TableAlias: lambda self: self._parse_table_alias(), 681 exp.When: lambda self: seq_get(self._parse_when_matched(), 0), 682 exp.Where: lambda self: self._parse_where(), 683 exp.Window: lambda self: self._parse_named_window(), 684 exp.With: lambda self: self._parse_with(), 685 "JOIN_TYPE": lambda self: self._parse_join_parts(), 686 } 687 688 STATEMENT_PARSERS = { 689 TokenType.ALTER: lambda self: self._parse_alter(), 690 TokenType.BEGIN: lambda self: self._parse_transaction(), 691 TokenType.CACHE: lambda self: self._parse_cache(), 692 TokenType.COMMENT: lambda self: self._parse_comment(), 693 TokenType.COMMIT: lambda self: self._parse_commit_or_rollback(), 694 TokenType.COPY: lambda self: self._parse_copy(), 695 TokenType.CREATE: lambda self: self._parse_create(), 696 TokenType.DELETE: lambda self: self._parse_delete(), 697 TokenType.DESC: lambda self: self._parse_describe(), 698 TokenType.DESCRIBE: lambda self: self._parse_describe(), 699 TokenType.DROP: lambda self: self._parse_drop(), 700 TokenType.INSERT: lambda self: self._parse_insert(), 701 TokenType.KILL: lambda self: self._parse_kill(), 702 TokenType.LOAD: lambda self: self._parse_load(), 703 TokenType.MERGE: lambda self: self._parse_merge(), 704 TokenType.PIVOT: lambda self: self._parse_simplified_pivot(), 705 TokenType.PRAGMA: lambda self: self.expression(exp.Pragma, this=self._parse_expression()), 706 TokenType.REFRESH: lambda self: self._parse_refresh(), 707 TokenType.ROLLBACK: lambda self: self._parse_commit_or_rollback(), 708 TokenType.SET: lambda self: self._parse_set(), 709 TokenType.TRUNCATE: lambda self: self._parse_truncate_table(), 710 TokenType.UNCACHE: lambda self: self._parse_uncache(), 711 TokenType.UPDATE: lambda self: self._parse_update(), 712 TokenType.USE: lambda self: self.expression( 713 exp.Use, 714 kind=self._parse_var_from_options(self.USABLES, raise_unmatched=False), 715 this=self._parse_table(schema=False), 716 ), 717 TokenType.SEMICOLON: lambda self: self.expression(exp.Semicolon), 718 } 719 720 UNARY_PARSERS = { 721 TokenType.PLUS: lambda self: self._parse_unary(), # Unary + is handled as a no-op 722 TokenType.NOT: lambda self: self.expression(exp.Not, this=self._parse_equality()), 723 TokenType.TILDA: lambda self: self.expression(exp.BitwiseNot, this=self._parse_unary()), 724 TokenType.DASH: lambda self: self.expression(exp.Neg, this=self._parse_unary()), 725 TokenType.PIPE_SLASH: lambda self: self.expression(exp.Sqrt, this=self._parse_unary()), 726 TokenType.DPIPE_SLASH: lambda self: self.expression(exp.Cbrt, this=self._parse_unary()), 727 } 728 729 STRING_PARSERS = { 730 TokenType.HEREDOC_STRING: lambda self, token: self.expression( 731 exp.RawString, this=token.text 732 ), 733 TokenType.NATIONAL_STRING: lambda self, token: self.expression( 734 exp.National, this=token.text 735 ), 736 TokenType.RAW_STRING: lambda self, token: self.expression(exp.RawString, this=token.text), 737 TokenType.STRING: lambda self, token: self.expression( 738 exp.Literal, this=token.text, is_string=True 739 ), 740 TokenType.UNICODE_STRING: lambda self, token: self.expression( 741 exp.UnicodeString, 742 this=token.text, 743 escape=self._match_text_seq("UESCAPE") and self._parse_string(), 744 ), 745 } 746 747 NUMERIC_PARSERS = { 748 TokenType.BIT_STRING: lambda self, token: self.expression(exp.BitString, this=token.text), 749 TokenType.BYTE_STRING: lambda self, token: self.expression(exp.ByteString, this=token.text), 750 TokenType.HEX_STRING: lambda self, token: self.expression(exp.HexString, this=token.text), 751 TokenType.NUMBER: lambda self, token: self.expression( 752 exp.Literal, this=token.text, is_string=False 753 ), 754 } 755 756 PRIMARY_PARSERS = { 757 **STRING_PARSERS, 758 **NUMERIC_PARSERS, 759 TokenType.INTRODUCER: lambda self, token: self._parse_introducer(token), 760 TokenType.NULL: lambda self, _: self.expression(exp.Null), 761 TokenType.TRUE: lambda self, _: self.expression(exp.Boolean, this=True), 762 TokenType.FALSE: lambda self, _: self.expression(exp.Boolean, this=False), 763 TokenType.SESSION_PARAMETER: lambda self, _: self._parse_session_parameter(), 764 TokenType.STAR: lambda self, _: self.expression( 765 exp.Star, 766 **{ 767 "except": self._parse_star_op("EXCEPT", "EXCLUDE"), 768 "replace": self._parse_star_op("REPLACE"), 769 "rename": self._parse_star_op("RENAME"), 770 }, 771 ), 772 } 773 774 PLACEHOLDER_PARSERS = { 775 TokenType.PLACEHOLDER: lambda self: self.expression(exp.Placeholder), 776 TokenType.PARAMETER: lambda self: self._parse_parameter(), 777 TokenType.COLON: lambda self: ( 778 self.expression(exp.Placeholder, this=self._prev.text) 779 if self._match(TokenType.NUMBER) or self._match_set(self.ID_VAR_TOKENS) 780 else None 781 ), 782 } 783 784 RANGE_PARSERS = { 785 TokenType.BETWEEN: lambda self, this: self._parse_between(this), 786 TokenType.GLOB: binary_range_parser(exp.Glob), 787 TokenType.ILIKE: binary_range_parser(exp.ILike), 788 TokenType.IN: lambda self, this: self._parse_in(this), 789 TokenType.IRLIKE: binary_range_parser(exp.RegexpILike), 790 TokenType.IS: lambda self, this: self._parse_is(this), 791 TokenType.LIKE: binary_range_parser(exp.Like), 792 TokenType.OVERLAPS: binary_range_parser(exp.Overlaps), 793 TokenType.RLIKE: binary_range_parser(exp.RegexpLike), 794 TokenType.SIMILAR_TO: binary_range_parser(exp.SimilarTo), 795 TokenType.FOR: lambda self, this: self._parse_comprehension(this), 796 } 797 798 PROPERTY_PARSERS: t.Dict[str, t.Callable] = { 799 "ALLOWED_VALUES": lambda self: self.expression( 800 exp.AllowedValuesProperty, expressions=self._parse_csv(self._parse_primary) 801 ), 802 "ALGORITHM": lambda self: self._parse_property_assignment(exp.AlgorithmProperty), 803 "AUTO": lambda self: self._parse_auto_property(), 804 "AUTO_INCREMENT": lambda self: self._parse_property_assignment(exp.AutoIncrementProperty), 805 "BACKUP": lambda self: self.expression( 806 exp.BackupProperty, this=self._parse_var(any_token=True) 807 ), 808 "BLOCKCOMPRESSION": lambda self: self._parse_blockcompression(), 809 "CHARSET": lambda self, **kwargs: self._parse_character_set(**kwargs), 810 "CHARACTER SET": lambda self, **kwargs: self._parse_character_set(**kwargs), 811 "CHECKSUM": lambda self: self._parse_checksum(), 812 "CLUSTER BY": lambda self: self._parse_cluster(), 813 "CLUSTERED": lambda self: self._parse_clustered_by(), 814 "COLLATE": lambda self, **kwargs: self._parse_property_assignment( 815 exp.CollateProperty, **kwargs 816 ), 817 "COMMENT": lambda self: self._parse_property_assignment(exp.SchemaCommentProperty), 818 "CONTAINS": lambda self: self._parse_contains_property(), 819 "COPY": lambda self: self._parse_copy_property(), 820 "DATABLOCKSIZE": lambda self, **kwargs: self._parse_datablocksize(**kwargs), 821 "DATA_DELETION": lambda self: self._parse_data_deletion_property(), 822 "DEFINER": lambda self: self._parse_definer(), 823 "DETERMINISTIC": lambda self: self.expression( 824 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 825 ), 826 "DYNAMIC": lambda self: self.expression(exp.DynamicProperty), 827 "DISTKEY": lambda self: self._parse_distkey(), 828 "DISTSTYLE": lambda self: self._parse_property_assignment(exp.DistStyleProperty), 829 "ENGINE": lambda self: self._parse_property_assignment(exp.EngineProperty), 830 "EXECUTE": lambda self: self._parse_property_assignment(exp.ExecuteAsProperty), 831 "EXTERNAL": lambda self: self.expression(exp.ExternalProperty), 832 "FALLBACK": lambda self, **kwargs: self._parse_fallback(**kwargs), 833 "FORMAT": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 834 "FREESPACE": lambda self: self._parse_freespace(), 835 "GLOBAL": lambda self: self.expression(exp.GlobalProperty), 836 "HEAP": lambda self: self.expression(exp.HeapProperty), 837 "ICEBERG": lambda self: self.expression(exp.IcebergProperty), 838 "IMMUTABLE": lambda self: self.expression( 839 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 840 ), 841 "INHERITS": lambda self: self.expression( 842 exp.InheritsProperty, expressions=self._parse_wrapped_csv(self._parse_table) 843 ), 844 "INPUT": lambda self: self.expression(exp.InputModelProperty, this=self._parse_schema()), 845 "JOURNAL": lambda self, **kwargs: self._parse_journal(**kwargs), 846 "LANGUAGE": lambda self: self._parse_property_assignment(exp.LanguageProperty), 847 "LAYOUT": lambda self: self._parse_dict_property(this="LAYOUT"), 848 "LIFETIME": lambda self: self._parse_dict_range(this="LIFETIME"), 849 "LIKE": lambda self: self._parse_create_like(), 850 "LOCATION": lambda self: self._parse_property_assignment(exp.LocationProperty), 851 "LOCK": lambda self: self._parse_locking(), 852 "LOCKING": lambda self: self._parse_locking(), 853 "LOG": lambda self, **kwargs: self._parse_log(**kwargs), 854 "MATERIALIZED": lambda self: self.expression(exp.MaterializedProperty), 855 "MERGEBLOCKRATIO": lambda self, **kwargs: self._parse_mergeblockratio(**kwargs), 856 "MODIFIES": lambda self: self._parse_modifies_property(), 857 "MULTISET": lambda self: self.expression(exp.SetProperty, multi=True), 858 "NO": lambda self: self._parse_no_property(), 859 "ON": lambda self: self._parse_on_property(), 860 "ORDER BY": lambda self: self._parse_order(skip_order_token=True), 861 "OUTPUT": lambda self: self.expression(exp.OutputModelProperty, this=self._parse_schema()), 862 "PARTITION": lambda self: self._parse_partitioned_of(), 863 "PARTITION BY": lambda self: self._parse_partitioned_by(), 864 "PARTITIONED BY": lambda self: self._parse_partitioned_by(), 865 "PARTITIONED_BY": lambda self: self._parse_partitioned_by(), 866 "PRIMARY KEY": lambda self: self._parse_primary_key(in_props=True), 867 "RANGE": lambda self: self._parse_dict_range(this="RANGE"), 868 "READS": lambda self: self._parse_reads_property(), 869 "REMOTE": lambda self: self._parse_remote_with_connection(), 870 "RETURNS": lambda self: self._parse_returns(), 871 "STRICT": lambda self: self.expression(exp.StrictProperty), 872 "ROW": lambda self: self._parse_row(), 873 "ROW_FORMAT": lambda self: self._parse_property_assignment(exp.RowFormatProperty), 874 "SAMPLE": lambda self: self.expression( 875 exp.SampleProperty, this=self._match_text_seq("BY") and self._parse_bitwise() 876 ), 877 "SECURE": lambda self: self.expression(exp.SecureProperty), 878 "SET": lambda self: self.expression(exp.SetProperty, multi=False), 879 "SETTINGS": lambda self: self.expression( 880 exp.SettingsProperty, expressions=self._parse_csv(self._parse_set_item) 881 ), 882 "SHARING": lambda self: self._parse_property_assignment(exp.SharingProperty), 883 "SORTKEY": lambda self: self._parse_sortkey(), 884 "SOURCE": lambda self: self._parse_dict_property(this="SOURCE"), 885 "STABLE": lambda self: self.expression( 886 exp.StabilityProperty, this=exp.Literal.string("STABLE") 887 ), 888 "STORED": lambda self: self._parse_stored(), 889 "SYSTEM_VERSIONING": lambda self: self._parse_system_versioning_property(), 890 "TBLPROPERTIES": lambda self: self._parse_wrapped_properties(), 891 "TEMP": lambda self: self.expression(exp.TemporaryProperty), 892 "TEMPORARY": lambda self: self.expression(exp.TemporaryProperty), 893 "TO": lambda self: self._parse_to_table(), 894 "TRANSIENT": lambda self: self.expression(exp.TransientProperty), 895 "TRANSFORM": lambda self: self.expression( 896 exp.TransformModelProperty, expressions=self._parse_wrapped_csv(self._parse_expression) 897 ), 898 "TTL": lambda self: self._parse_ttl(), 899 "USING": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 900 "UNLOGGED": lambda self: self.expression(exp.UnloggedProperty), 901 "VOLATILE": lambda self: self._parse_volatile_property(), 902 "WITH": lambda self: self._parse_with_property(), 903 } 904 905 CONSTRAINT_PARSERS = { 906 "AUTOINCREMENT": lambda self: self._parse_auto_increment(), 907 "AUTO_INCREMENT": lambda self: self._parse_auto_increment(), 908 "CASESPECIFIC": lambda self: self.expression(exp.CaseSpecificColumnConstraint, not_=False), 909 "CHARACTER SET": lambda self: self.expression( 910 exp.CharacterSetColumnConstraint, this=self._parse_var_or_string() 911 ), 912 "CHECK": lambda self: self.expression( 913 exp.CheckColumnConstraint, 914 this=self._parse_wrapped(self._parse_assignment), 915 enforced=self._match_text_seq("ENFORCED"), 916 ), 917 "COLLATE": lambda self: self.expression( 918 exp.CollateColumnConstraint, this=self._parse_var(any_token=True) 919 ), 920 "COMMENT": lambda self: self.expression( 921 exp.CommentColumnConstraint, this=self._parse_string() 922 ), 923 "COMPRESS": lambda self: self._parse_compress(), 924 "CLUSTERED": lambda self: self.expression( 925 exp.ClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 926 ), 927 "NONCLUSTERED": lambda self: self.expression( 928 exp.NonClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 929 ), 930 "DEFAULT": lambda self: self.expression( 931 exp.DefaultColumnConstraint, this=self._parse_bitwise() 932 ), 933 "ENCODE": lambda self: self.expression(exp.EncodeColumnConstraint, this=self._parse_var()), 934 "EPHEMERAL": lambda self: self.expression( 935 exp.EphemeralColumnConstraint, this=self._parse_bitwise() 936 ), 937 "EXCLUDE": lambda self: self.expression( 938 exp.ExcludeColumnConstraint, this=self._parse_index_params() 939 ), 940 "FOREIGN KEY": lambda self: self._parse_foreign_key(), 941 "FORMAT": lambda self: self.expression( 942 exp.DateFormatColumnConstraint, this=self._parse_var_or_string() 943 ), 944 "GENERATED": lambda self: self._parse_generated_as_identity(), 945 "IDENTITY": lambda self: self._parse_auto_increment(), 946 "INLINE": lambda self: self._parse_inline(), 947 "LIKE": lambda self: self._parse_create_like(), 948 "NOT": lambda self: self._parse_not_constraint(), 949 "NULL": lambda self: self.expression(exp.NotNullColumnConstraint, allow_null=True), 950 "ON": lambda self: ( 951 self._match(TokenType.UPDATE) 952 and self.expression(exp.OnUpdateColumnConstraint, this=self._parse_function()) 953 ) 954 or self.expression(exp.OnProperty, this=self._parse_id_var()), 955 "PATH": lambda self: self.expression(exp.PathColumnConstraint, this=self._parse_string()), 956 "PERIOD": lambda self: self._parse_period_for_system_time(), 957 "PRIMARY KEY": lambda self: self._parse_primary_key(), 958 "REFERENCES": lambda self: self._parse_references(match=False), 959 "TITLE": lambda self: self.expression( 960 exp.TitleColumnConstraint, this=self._parse_var_or_string() 961 ), 962 "TTL": lambda self: self.expression(exp.MergeTreeTTL, expressions=[self._parse_bitwise()]), 963 "UNIQUE": lambda self: self._parse_unique(), 964 "UPPERCASE": lambda self: self.expression(exp.UppercaseColumnConstraint), 965 "WITH": lambda self: self.expression( 966 exp.Properties, expressions=self._parse_wrapped_properties() 967 ), 968 } 969 970 ALTER_PARSERS = { 971 "ADD": lambda self: self._parse_alter_table_add(), 972 "ALTER": lambda self: self._parse_alter_table_alter(), 973 "CLUSTER BY": lambda self: self._parse_cluster(wrapped=True), 974 "DELETE": lambda self: self.expression(exp.Delete, where=self._parse_where()), 975 "DROP": lambda self: self._parse_alter_table_drop(), 976 "RENAME": lambda self: self._parse_alter_table_rename(), 977 "SET": lambda self: self._parse_alter_table_set(), 978 } 979 980 ALTER_ALTER_PARSERS = { 981 "DISTKEY": lambda self: self._parse_alter_diststyle(), 982 "DISTSTYLE": lambda self: self._parse_alter_diststyle(), 983 "SORTKEY": lambda self: self._parse_alter_sortkey(), 984 "COMPOUND": lambda self: self._parse_alter_sortkey(compound=True), 985 } 986 987 SCHEMA_UNNAMED_CONSTRAINTS = { 988 "CHECK", 989 "EXCLUDE", 990 "FOREIGN KEY", 991 "LIKE", 992 "PERIOD", 993 "PRIMARY KEY", 994 "UNIQUE", 995 } 996 997 NO_PAREN_FUNCTION_PARSERS = { 998 "ANY": lambda self: self.expression(exp.Any, this=self._parse_bitwise()), 999 "CASE": lambda self: self._parse_case(), 1000 "IF": lambda self: self._parse_if(), 1001 "NEXT": lambda self: self._parse_next_value_for(), 1002 } 1003 1004 INVALID_FUNC_NAME_TOKENS = { 1005 TokenType.IDENTIFIER, 1006 TokenType.STRING, 1007 } 1008 1009 FUNCTIONS_WITH_ALIASED_ARGS = {"STRUCT"} 1010 1011 KEY_VALUE_DEFINITIONS = (exp.Alias, exp.EQ, exp.PropertyEQ, exp.Slice) 1012 1013 FUNCTION_PARSERS = { 1014 "CAST": lambda self: self._parse_cast(self.STRICT_CAST), 1015 "CONVERT": lambda self: self._parse_convert(self.STRICT_CAST), 1016 "DECODE": lambda self: self._parse_decode(), 1017 "EXTRACT": lambda self: self._parse_extract(), 1018 "GAP_FILL": lambda self: self._parse_gap_fill(), 1019 "JSON_OBJECT": lambda self: self._parse_json_object(), 1020 "JSON_OBJECTAGG": lambda self: self._parse_json_object(agg=True), 1021 "JSON_TABLE": lambda self: self._parse_json_table(), 1022 "MATCH": lambda self: self._parse_match_against(), 1023 "OPENJSON": lambda self: self._parse_open_json(), 1024 "POSITION": lambda self: self._parse_position(), 1025 "PREDICT": lambda self: self._parse_predict(), 1026 "SAFE_CAST": lambda self: self._parse_cast(False, safe=True), 1027 "STRING_AGG": lambda self: self._parse_string_agg(), 1028 "SUBSTRING": lambda self: self._parse_substring(), 1029 "TRIM": lambda self: self._parse_trim(), 1030 "TRY_CAST": lambda self: self._parse_cast(False, safe=True), 1031 "TRY_CONVERT": lambda self: self._parse_convert(False, safe=True), 1032 } 1033 1034 QUERY_MODIFIER_PARSERS = { 1035 TokenType.MATCH_RECOGNIZE: lambda self: ("match", self._parse_match_recognize()), 1036 TokenType.PREWHERE: lambda self: ("prewhere", self._parse_prewhere()), 1037 TokenType.WHERE: lambda self: ("where", self._parse_where()), 1038 TokenType.GROUP_BY: lambda self: ("group", self._parse_group()), 1039 TokenType.HAVING: lambda self: ("having", self._parse_having()), 1040 TokenType.QUALIFY: lambda self: ("qualify", self._parse_qualify()), 1041 TokenType.WINDOW: lambda self: ("windows", self._parse_window_clause()), 1042 TokenType.ORDER_BY: lambda self: ("order", self._parse_order()), 1043 TokenType.LIMIT: lambda self: ("limit", self._parse_limit()), 1044 TokenType.FETCH: lambda self: ("limit", self._parse_limit()), 1045 TokenType.OFFSET: lambda self: ("offset", self._parse_offset()), 1046 TokenType.FOR: lambda self: ("locks", self._parse_locks()), 1047 TokenType.LOCK: lambda self: ("locks", self._parse_locks()), 1048 TokenType.TABLE_SAMPLE: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 1049 TokenType.USING: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 1050 TokenType.CLUSTER_BY: lambda self: ( 1051 "cluster", 1052 self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 1053 ), 1054 TokenType.DISTRIBUTE_BY: lambda self: ( 1055 "distribute", 1056 self._parse_sort(exp.Distribute, TokenType.DISTRIBUTE_BY), 1057 ), 1058 TokenType.SORT_BY: lambda self: ("sort", self._parse_sort(exp.Sort, TokenType.SORT_BY)), 1059 TokenType.CONNECT_BY: lambda self: ("connect", self._parse_connect(skip_start_token=True)), 1060 TokenType.START_WITH: lambda self: ("connect", self._parse_connect()), 1061 } 1062 1063 SET_PARSERS = { 1064 "GLOBAL": lambda self: self._parse_set_item_assignment("GLOBAL"), 1065 "LOCAL": lambda self: self._parse_set_item_assignment("LOCAL"), 1066 "SESSION": lambda self: self._parse_set_item_assignment("SESSION"), 1067 "TRANSACTION": lambda self: self._parse_set_transaction(), 1068 } 1069 1070 SHOW_PARSERS: t.Dict[str, t.Callable] = {} 1071 1072 TYPE_LITERAL_PARSERS = { 1073 exp.DataType.Type.JSON: lambda self, this, _: self.expression(exp.ParseJSON, this=this), 1074 } 1075 1076 TYPE_CONVERTERS: t.Dict[exp.DataType.Type, t.Callable[[exp.DataType], exp.DataType]] = {} 1077 1078 DDL_SELECT_TOKENS = {TokenType.SELECT, TokenType.WITH, TokenType.L_PAREN} 1079 1080 PRE_VOLATILE_TOKENS = {TokenType.CREATE, TokenType.REPLACE, TokenType.UNIQUE} 1081 1082 TRANSACTION_KIND = {"DEFERRED", "IMMEDIATE", "EXCLUSIVE"} 1083 TRANSACTION_CHARACTERISTICS: OPTIONS_TYPE = { 1084 "ISOLATION": ( 1085 ("LEVEL", "REPEATABLE", "READ"), 1086 ("LEVEL", "READ", "COMMITTED"), 1087 ("LEVEL", "READ", "UNCOMITTED"), 1088 ("LEVEL", "SERIALIZABLE"), 1089 ), 1090 "READ": ("WRITE", "ONLY"), 1091 } 1092 1093 CONFLICT_ACTIONS: OPTIONS_TYPE = dict.fromkeys( 1094 ("ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK", "UPDATE"), tuple() 1095 ) 1096 CONFLICT_ACTIONS["DO"] = ("NOTHING", "UPDATE") 1097 1098 CREATE_SEQUENCE: OPTIONS_TYPE = { 1099 "SCALE": ("EXTEND", "NOEXTEND"), 1100 "SHARD": ("EXTEND", "NOEXTEND"), 1101 "NO": ("CYCLE", "CACHE", "MAXVALUE", "MINVALUE"), 1102 **dict.fromkeys( 1103 ( 1104 "SESSION", 1105 "GLOBAL", 1106 "KEEP", 1107 "NOKEEP", 1108 "ORDER", 1109 "NOORDER", 1110 "NOCACHE", 1111 "CYCLE", 1112 "NOCYCLE", 1113 "NOMINVALUE", 1114 "NOMAXVALUE", 1115 "NOSCALE", 1116 "NOSHARD", 1117 ), 1118 tuple(), 1119 ), 1120 } 1121 1122 ISOLATED_LOADING_OPTIONS: OPTIONS_TYPE = {"FOR": ("ALL", "INSERT", "NONE")} 1123 1124 USABLES: OPTIONS_TYPE = dict.fromkeys(("ROLE", "WAREHOUSE", "DATABASE", "SCHEMA"), tuple()) 1125 1126 CAST_ACTIONS: OPTIONS_TYPE = dict.fromkeys(("RENAME", "ADD"), ("FIELDS",)) 1127 1128 INSERT_ALTERNATIVES = {"ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK"} 1129 1130 CLONE_KEYWORDS = {"CLONE", "COPY"} 1131 HISTORICAL_DATA_KIND = {"TIMESTAMP", "OFFSET", "STATEMENT", "STREAM"} 1132 1133 OPCLASS_FOLLOW_KEYWORDS = {"ASC", "DESC", "NULLS", "WITH"} 1134 1135 OPTYPE_FOLLOW_TOKENS = {TokenType.COMMA, TokenType.R_PAREN} 1136 1137 TABLE_INDEX_HINT_TOKENS = {TokenType.FORCE, TokenType.IGNORE, TokenType.USE} 1138 1139 VIEW_ATTRIBUTES = {"ENCRYPTION", "SCHEMABINDING", "VIEW_METADATA"} 1140 1141 WINDOW_ALIAS_TOKENS = ID_VAR_TOKENS - {TokenType.ROWS} 1142 WINDOW_BEFORE_PAREN_TOKENS = {TokenType.OVER} 1143 WINDOW_SIDES = {"FOLLOWING", "PRECEDING"} 1144 1145 JSON_KEY_VALUE_SEPARATOR_TOKENS = {TokenType.COLON, TokenType.COMMA, TokenType.IS} 1146 1147 FETCH_TOKENS = ID_VAR_TOKENS - {TokenType.ROW, TokenType.ROWS, TokenType.PERCENT} 1148 1149 ADD_CONSTRAINT_TOKENS = { 1150 TokenType.CONSTRAINT, 1151 TokenType.FOREIGN_KEY, 1152 TokenType.INDEX, 1153 TokenType.KEY, 1154 TokenType.PRIMARY_KEY, 1155 TokenType.UNIQUE, 1156 } 1157 1158 DISTINCT_TOKENS = {TokenType.DISTINCT} 1159 1160 NULL_TOKENS = {TokenType.NULL} 1161 1162 UNNEST_OFFSET_ALIAS_TOKENS = ID_VAR_TOKENS - SET_OPERATIONS 1163 1164 SELECT_START_TOKENS = {TokenType.L_PAREN, TokenType.WITH, TokenType.SELECT} 1165 1166 COPY_INTO_VARLEN_OPTIONS = {"FILE_FORMAT", "COPY_OPTIONS", "FORMAT_OPTIONS", "CREDENTIAL"} 1167 1168 STRICT_CAST = True 1169 1170 PREFIXED_PIVOT_COLUMNS = False 1171 IDENTIFY_PIVOT_STRINGS = False 1172 1173 LOG_DEFAULTS_TO_LN = False 1174 1175 # Whether ADD is present for each column added by ALTER TABLE 1176 ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN = True 1177 1178 # Whether the table sample clause expects CSV syntax 1179 TABLESAMPLE_CSV = False 1180 1181 # The default method used for table sampling 1182 DEFAULT_SAMPLING_METHOD: t.Optional[str] = None 1183 1184 # Whether the SET command needs a delimiter (e.g. "=") for assignments 1185 SET_REQUIRES_ASSIGNMENT_DELIMITER = True 1186 1187 # Whether the TRIM function expects the characters to trim as its first argument 1188 TRIM_PATTERN_FIRST = False 1189 1190 # Whether string aliases are supported `SELECT COUNT(*) 'count'` 1191 STRING_ALIASES = False 1192 1193 # Whether query modifiers such as LIMIT are attached to the UNION node (vs its right operand) 1194 MODIFIERS_ATTACHED_TO_SET_OP = True 1195 SET_OP_MODIFIERS = {"order", "limit", "offset"} 1196 1197 # Whether to parse IF statements that aren't followed by a left parenthesis as commands 1198 NO_PAREN_IF_COMMANDS = True 1199 1200 # Whether the -> and ->> operators expect documents of type JSON (e.g. Postgres) 1201 JSON_ARROWS_REQUIRE_JSON_TYPE = False 1202 1203 # Whether the `:` operator is used to extract a value from a JSON document 1204 COLON_IS_JSON_EXTRACT = False 1205 1206 # Whether or not a VALUES keyword needs to be followed by '(' to form a VALUES clause. 1207 # If this is True and '(' is not found, the keyword will be treated as an identifier 1208 VALUES_FOLLOWED_BY_PAREN = True 1209 1210 # Whether implicit unnesting is supported, e.g. SELECT 1 FROM y.z AS z, z.a (Redshift) 1211 SUPPORTS_IMPLICIT_UNNEST = False 1212 1213 # Whether or not interval spans are supported, INTERVAL 1 YEAR TO MONTHS 1214 INTERVAL_SPANS = True 1215 1216 # Whether a PARTITION clause can follow a table reference 1217 SUPPORTS_PARTITION_SELECTION = False 1218 1219 __slots__ = ( 1220 "error_level", 1221 "error_message_context", 1222 "max_errors", 1223 "dialect", 1224 "sql", 1225 "errors", 1226 "_tokens", 1227 "_index", 1228 "_curr", 1229 "_next", 1230 "_prev", 1231 "_prev_comments", 1232 ) 1233 1234 # Autofilled 1235 SHOW_TRIE: t.Dict = {} 1236 SET_TRIE: t.Dict = {} 1237 1238 def __init__( 1239 self, 1240 error_level: t.Optional[ErrorLevel] = None, 1241 error_message_context: int = 100, 1242 max_errors: int = 3, 1243 dialect: DialectType = None, 1244 ): 1245 from sqlglot.dialects import Dialect 1246 1247 self.error_level = error_level or ErrorLevel.IMMEDIATE 1248 self.error_message_context = error_message_context 1249 self.max_errors = max_errors 1250 self.dialect = Dialect.get_or_raise(dialect) 1251 self.reset() 1252 1253 def reset(self): 1254 self.sql = "" 1255 self.errors = [] 1256 self._tokens = [] 1257 self._index = 0 1258 self._curr = None 1259 self._next = None 1260 self._prev = None 1261 self._prev_comments = None 1262 1263 def parse( 1264 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 1265 ) -> t.List[t.Optional[exp.Expression]]: 1266 """ 1267 Parses a list of tokens and returns a list of syntax trees, one tree 1268 per parsed SQL statement. 1269 1270 Args: 1271 raw_tokens: The list of tokens. 1272 sql: The original SQL string, used to produce helpful debug messages. 1273 1274 Returns: 1275 The list of the produced syntax trees. 1276 """ 1277 return self._parse( 1278 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 1279 ) 1280 1281 def parse_into( 1282 self, 1283 expression_types: exp.IntoType, 1284 raw_tokens: t.List[Token], 1285 sql: t.Optional[str] = None, 1286 ) -> t.List[t.Optional[exp.Expression]]: 1287 """ 1288 Parses a list of tokens into a given Expression type. If a collection of Expression 1289 types is given instead, this method will try to parse the token list into each one 1290 of them, stopping at the first for which the parsing succeeds. 1291 1292 Args: 1293 expression_types: The expression type(s) to try and parse the token list into. 1294 raw_tokens: The list of tokens. 1295 sql: The original SQL string, used to produce helpful debug messages. 1296 1297 Returns: 1298 The target Expression. 1299 """ 1300 errors = [] 1301 for expression_type in ensure_list(expression_types): 1302 parser = self.EXPRESSION_PARSERS.get(expression_type) 1303 if not parser: 1304 raise TypeError(f"No parser registered for {expression_type}") 1305 1306 try: 1307 return self._parse(parser, raw_tokens, sql) 1308 except ParseError as e: 1309 e.errors[0]["into_expression"] = expression_type 1310 errors.append(e) 1311 1312 raise ParseError( 1313 f"Failed to parse '{sql or raw_tokens}' into {expression_types}", 1314 errors=merge_errors(errors), 1315 ) from errors[-1] 1316 1317 def _parse( 1318 self, 1319 parse_method: t.Callable[[Parser], t.Optional[exp.Expression]], 1320 raw_tokens: t.List[Token], 1321 sql: t.Optional[str] = None, 1322 ) -> t.List[t.Optional[exp.Expression]]: 1323 self.reset() 1324 self.sql = sql or "" 1325 1326 total = len(raw_tokens) 1327 chunks: t.List[t.List[Token]] = [[]] 1328 1329 for i, token in enumerate(raw_tokens): 1330 if token.token_type == TokenType.SEMICOLON: 1331 if token.comments: 1332 chunks.append([token]) 1333 1334 if i < total - 1: 1335 chunks.append([]) 1336 else: 1337 chunks[-1].append(token) 1338 1339 expressions = [] 1340 1341 for tokens in chunks: 1342 self._index = -1 1343 self._tokens = tokens 1344 self._advance() 1345 1346 expressions.append(parse_method(self)) 1347 1348 if self._index < len(self._tokens): 1349 self.raise_error("Invalid expression / Unexpected token") 1350 1351 self.check_errors() 1352 1353 return expressions 1354 1355 def check_errors(self) -> None: 1356 """Logs or raises any found errors, depending on the chosen error level setting.""" 1357 if self.error_level == ErrorLevel.WARN: 1358 for error in self.errors: 1359 logger.error(str(error)) 1360 elif self.error_level == ErrorLevel.RAISE and self.errors: 1361 raise ParseError( 1362 concat_messages(self.errors, self.max_errors), 1363 errors=merge_errors(self.errors), 1364 ) 1365 1366 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 1367 """ 1368 Appends an error in the list of recorded errors or raises it, depending on the chosen 1369 error level setting. 1370 """ 1371 token = token or self._curr or self._prev or Token.string("") 1372 start = token.start 1373 end = token.end + 1 1374 start_context = self.sql[max(start - self.error_message_context, 0) : start] 1375 highlight = self.sql[start:end] 1376 end_context = self.sql[end : end + self.error_message_context] 1377 1378 error = ParseError.new( 1379 f"{message}. Line {token.line}, Col: {token.col}.\n" 1380 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 1381 description=message, 1382 line=token.line, 1383 col=token.col, 1384 start_context=start_context, 1385 highlight=highlight, 1386 end_context=end_context, 1387 ) 1388 1389 if self.error_level == ErrorLevel.IMMEDIATE: 1390 raise error 1391 1392 self.errors.append(error) 1393 1394 def expression( 1395 self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs 1396 ) -> E: 1397 """ 1398 Creates a new, validated Expression. 1399 1400 Args: 1401 exp_class: The expression class to instantiate. 1402 comments: An optional list of comments to attach to the expression. 1403 kwargs: The arguments to set for the expression along with their respective values. 1404 1405 Returns: 1406 The target expression. 1407 """ 1408 instance = exp_class(**kwargs) 1409 instance.add_comments(comments) if comments else self._add_comments(instance) 1410 return self.validate_expression(instance) 1411 1412 def _add_comments(self, expression: t.Optional[exp.Expression]) -> None: 1413 if expression and self._prev_comments: 1414 expression.add_comments(self._prev_comments) 1415 self._prev_comments = None 1416 1417 def validate_expression(self, expression: E, args: t.Optional[t.List] = None) -> E: 1418 """ 1419 Validates an Expression, making sure that all its mandatory arguments are set. 1420 1421 Args: 1422 expression: The expression to validate. 1423 args: An optional list of items that was used to instantiate the expression, if it's a Func. 1424 1425 Returns: 1426 The validated expression. 1427 """ 1428 if self.error_level != ErrorLevel.IGNORE: 1429 for error_message in expression.error_messages(args): 1430 self.raise_error(error_message) 1431 1432 return expression 1433 1434 def _find_sql(self, start: Token, end: Token) -> str: 1435 return self.sql[start.start : end.end + 1] 1436 1437 def _is_connected(self) -> bool: 1438 return self._prev and self._curr and self._prev.end + 1 == self._curr.start 1439 1440 def _advance(self, times: int = 1) -> None: 1441 self._index += times 1442 self._curr = seq_get(self._tokens, self._index) 1443 self._next = seq_get(self._tokens, self._index + 1) 1444 1445 if self._index > 0: 1446 self._prev = self._tokens[self._index - 1] 1447 self._prev_comments = self._prev.comments 1448 else: 1449 self._prev = None 1450 self._prev_comments = None 1451 1452 def _retreat(self, index: int) -> None: 1453 if index != self._index: 1454 self._advance(index - self._index) 1455 1456 def _warn_unsupported(self) -> None: 1457 if len(self._tokens) <= 1: 1458 return 1459 1460 # We use _find_sql because self.sql may comprise multiple chunks, and we're only 1461 # interested in emitting a warning for the one being currently processed. 1462 sql = self._find_sql(self._tokens[0], self._tokens[-1])[: self.error_message_context] 1463 1464 logger.warning( 1465 f"'{sql}' contains unsupported syntax. Falling back to parsing as a 'Command'." 1466 ) 1467 1468 def _parse_command(self) -> exp.Command: 1469 self._warn_unsupported() 1470 return self.expression( 1471 exp.Command, this=self._prev.text.upper(), expression=self._parse_string() 1472 ) 1473 1474 def _try_parse(self, parse_method: t.Callable[[], T], retreat: bool = False) -> t.Optional[T]: 1475 """ 1476 Attemps to backtrack if a parse function that contains a try/catch internally raises an error. This behavior can 1477 be different depending on the uset-set ErrorLevel, so _try_parse aims to solve this by setting & resetting 1478 the parser state accordingly 1479 """ 1480 index = self._index 1481 error_level = self.error_level 1482 1483 self.error_level = ErrorLevel.IMMEDIATE 1484 try: 1485 this = parse_method() 1486 except ParseError: 1487 this = None 1488 finally: 1489 if not this or retreat: 1490 self._retreat(index) 1491 self.error_level = error_level 1492 1493 return this 1494 1495 def _parse_comment(self, allow_exists: bool = True) -> exp.Expression: 1496 start = self._prev 1497 exists = self._parse_exists() if allow_exists else None 1498 1499 self._match(TokenType.ON) 1500 1501 materialized = self._match_text_seq("MATERIALIZED") 1502 kind = self._match_set(self.CREATABLES) and self._prev 1503 if not kind: 1504 return self._parse_as_command(start) 1505 1506 if kind.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1507 this = self._parse_user_defined_function(kind=kind.token_type) 1508 elif kind.token_type == TokenType.TABLE: 1509 this = self._parse_table(alias_tokens=self.COMMENT_TABLE_ALIAS_TOKENS) 1510 elif kind.token_type == TokenType.COLUMN: 1511 this = self._parse_column() 1512 else: 1513 this = self._parse_id_var() 1514 1515 self._match(TokenType.IS) 1516 1517 return self.expression( 1518 exp.Comment, 1519 this=this, 1520 kind=kind.text, 1521 expression=self._parse_string(), 1522 exists=exists, 1523 materialized=materialized, 1524 ) 1525 1526 def _parse_to_table( 1527 self, 1528 ) -> exp.ToTableProperty: 1529 table = self._parse_table_parts(schema=True) 1530 return self.expression(exp.ToTableProperty, this=table) 1531 1532 # https://clickhouse.com/docs/en/engines/table-engines/mergetree-family/mergetree#mergetree-table-ttl 1533 def _parse_ttl(self) -> exp.Expression: 1534 def _parse_ttl_action() -> t.Optional[exp.Expression]: 1535 this = self._parse_bitwise() 1536 1537 if self._match_text_seq("DELETE"): 1538 return self.expression(exp.MergeTreeTTLAction, this=this, delete=True) 1539 if self._match_text_seq("RECOMPRESS"): 1540 return self.expression( 1541 exp.MergeTreeTTLAction, this=this, recompress=self._parse_bitwise() 1542 ) 1543 if self._match_text_seq("TO", "DISK"): 1544 return self.expression( 1545 exp.MergeTreeTTLAction, this=this, to_disk=self._parse_string() 1546 ) 1547 if self._match_text_seq("TO", "VOLUME"): 1548 return self.expression( 1549 exp.MergeTreeTTLAction, this=this, to_volume=self._parse_string() 1550 ) 1551 1552 return this 1553 1554 expressions = self._parse_csv(_parse_ttl_action) 1555 where = self._parse_where() 1556 group = self._parse_group() 1557 1558 aggregates = None 1559 if group and self._match(TokenType.SET): 1560 aggregates = self._parse_csv(self._parse_set_item) 1561 1562 return self.expression( 1563 exp.MergeTreeTTL, 1564 expressions=expressions, 1565 where=where, 1566 group=group, 1567 aggregates=aggregates, 1568 ) 1569 1570 def _parse_statement(self) -> t.Optional[exp.Expression]: 1571 if self._curr is None: 1572 return None 1573 1574 if self._match_set(self.STATEMENT_PARSERS): 1575 return self.STATEMENT_PARSERS[self._prev.token_type](self) 1576 1577 if self._match_set(self.dialect.tokenizer.COMMANDS): 1578 return self._parse_command() 1579 1580 expression = self._parse_expression() 1581 expression = self._parse_set_operations(expression) if expression else self._parse_select() 1582 return self._parse_query_modifiers(expression) 1583 1584 def _parse_drop(self, exists: bool = False) -> exp.Drop | exp.Command: 1585 start = self._prev 1586 temporary = self._match(TokenType.TEMPORARY) 1587 materialized = self._match_text_seq("MATERIALIZED") 1588 1589 kind = self._match_set(self.CREATABLES) and self._prev.text 1590 if not kind: 1591 return self._parse_as_command(start) 1592 1593 if_exists = exists or self._parse_exists() 1594 table = self._parse_table_parts( 1595 schema=True, is_db_reference=self._prev.token_type == TokenType.SCHEMA 1596 ) 1597 1598 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 1599 1600 if self._match(TokenType.L_PAREN, advance=False): 1601 expressions = self._parse_wrapped_csv(self._parse_types) 1602 else: 1603 expressions = None 1604 1605 return self.expression( 1606 exp.Drop, 1607 comments=start.comments, 1608 exists=if_exists, 1609 this=table, 1610 expressions=expressions, 1611 kind=kind.upper(), 1612 temporary=temporary, 1613 materialized=materialized, 1614 cascade=self._match_text_seq("CASCADE"), 1615 constraints=self._match_text_seq("CONSTRAINTS"), 1616 purge=self._match_text_seq("PURGE"), 1617 cluster=cluster, 1618 ) 1619 1620 def _parse_exists(self, not_: bool = False) -> t.Optional[bool]: 1621 return ( 1622 self._match_text_seq("IF") 1623 and (not not_ or self._match(TokenType.NOT)) 1624 and self._match(TokenType.EXISTS) 1625 ) 1626 1627 def _parse_create(self) -> exp.Create | exp.Command: 1628 # Note: this can't be None because we've matched a statement parser 1629 start = self._prev 1630 comments = self._prev_comments 1631 1632 replace = ( 1633 start.token_type == TokenType.REPLACE 1634 or self._match_pair(TokenType.OR, TokenType.REPLACE) 1635 or self._match_pair(TokenType.OR, TokenType.ALTER) 1636 ) 1637 1638 unique = self._match(TokenType.UNIQUE) 1639 1640 if self._match_pair(TokenType.TABLE, TokenType.FUNCTION, advance=False): 1641 self._advance() 1642 1643 properties = None 1644 create_token = self._match_set(self.CREATABLES) and self._prev 1645 1646 if not create_token: 1647 # exp.Properties.Location.POST_CREATE 1648 properties = self._parse_properties() 1649 create_token = self._match_set(self.CREATABLES) and self._prev 1650 1651 if not properties or not create_token: 1652 return self._parse_as_command(start) 1653 1654 exists = self._parse_exists(not_=True) 1655 this = None 1656 expression: t.Optional[exp.Expression] = None 1657 indexes = None 1658 no_schema_binding = None 1659 begin = None 1660 end = None 1661 clone = None 1662 1663 def extend_props(temp_props: t.Optional[exp.Properties]) -> None: 1664 nonlocal properties 1665 if properties and temp_props: 1666 properties.expressions.extend(temp_props.expressions) 1667 elif temp_props: 1668 properties = temp_props 1669 1670 if create_token.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1671 this = self._parse_user_defined_function(kind=create_token.token_type) 1672 1673 # exp.Properties.Location.POST_SCHEMA ("schema" here is the UDF's type signature) 1674 extend_props(self._parse_properties()) 1675 1676 expression = self._match(TokenType.ALIAS) and self._parse_heredoc() 1677 extend_props(self._parse_properties()) 1678 1679 if not expression: 1680 if self._match(TokenType.COMMAND): 1681 expression = self._parse_as_command(self._prev) 1682 else: 1683 begin = self._match(TokenType.BEGIN) 1684 return_ = self._match_text_seq("RETURN") 1685 1686 if self._match(TokenType.STRING, advance=False): 1687 # Takes care of BigQuery's JavaScript UDF definitions that end in an OPTIONS property 1688 # # https://cloud.google.com/bigquery/docs/reference/standard-sql/data-definition-language#create_function_statement 1689 expression = self._parse_string() 1690 extend_props(self._parse_properties()) 1691 else: 1692 expression = self._parse_statement() 1693 1694 end = self._match_text_seq("END") 1695 1696 if return_: 1697 expression = self.expression(exp.Return, this=expression) 1698 elif create_token.token_type == TokenType.INDEX: 1699 # Postgres allows anonymous indexes, eg. CREATE INDEX IF NOT EXISTS ON t(c) 1700 if not self._match(TokenType.ON): 1701 index = self._parse_id_var() 1702 anonymous = False 1703 else: 1704 index = None 1705 anonymous = True 1706 1707 this = self._parse_index(index=index, anonymous=anonymous) 1708 elif create_token.token_type in self.DB_CREATABLES: 1709 table_parts = self._parse_table_parts( 1710 schema=True, is_db_reference=create_token.token_type == TokenType.SCHEMA 1711 ) 1712 1713 # exp.Properties.Location.POST_NAME 1714 self._match(TokenType.COMMA) 1715 extend_props(self._parse_properties(before=True)) 1716 1717 this = self._parse_schema(this=table_parts) 1718 1719 # exp.Properties.Location.POST_SCHEMA and POST_WITH 1720 extend_props(self._parse_properties()) 1721 1722 self._match(TokenType.ALIAS) 1723 if not self._match_set(self.DDL_SELECT_TOKENS, advance=False): 1724 # exp.Properties.Location.POST_ALIAS 1725 extend_props(self._parse_properties()) 1726 1727 if create_token.token_type == TokenType.SEQUENCE: 1728 expression = self._parse_types() 1729 extend_props(self._parse_properties()) 1730 else: 1731 expression = self._parse_ddl_select() 1732 1733 if create_token.token_type == TokenType.TABLE: 1734 # exp.Properties.Location.POST_EXPRESSION 1735 extend_props(self._parse_properties()) 1736 1737 indexes = [] 1738 while True: 1739 index = self._parse_index() 1740 1741 # exp.Properties.Location.POST_INDEX 1742 extend_props(self._parse_properties()) 1743 1744 if not index: 1745 break 1746 else: 1747 self._match(TokenType.COMMA) 1748 indexes.append(index) 1749 elif create_token.token_type == TokenType.VIEW: 1750 if self._match_text_seq("WITH", "NO", "SCHEMA", "BINDING"): 1751 no_schema_binding = True 1752 1753 shallow = self._match_text_seq("SHALLOW") 1754 1755 if self._match_texts(self.CLONE_KEYWORDS): 1756 copy = self._prev.text.lower() == "copy" 1757 clone = self.expression( 1758 exp.Clone, this=self._parse_table(schema=True), shallow=shallow, copy=copy 1759 ) 1760 1761 if self._curr and not self._match_set((TokenType.R_PAREN, TokenType.COMMA), advance=False): 1762 return self._parse_as_command(start) 1763 1764 return self.expression( 1765 exp.Create, 1766 comments=comments, 1767 this=this, 1768 kind=create_token.text.upper(), 1769 replace=replace, 1770 unique=unique, 1771 expression=expression, 1772 exists=exists, 1773 properties=properties, 1774 indexes=indexes, 1775 no_schema_binding=no_schema_binding, 1776 begin=begin, 1777 end=end, 1778 clone=clone, 1779 ) 1780 1781 def _parse_sequence_properties(self) -> t.Optional[exp.SequenceProperties]: 1782 seq = exp.SequenceProperties() 1783 1784 options = [] 1785 index = self._index 1786 1787 while self._curr: 1788 self._match(TokenType.COMMA) 1789 if self._match_text_seq("INCREMENT"): 1790 self._match_text_seq("BY") 1791 self._match_text_seq("=") 1792 seq.set("increment", self._parse_term()) 1793 elif self._match_text_seq("MINVALUE"): 1794 seq.set("minvalue", self._parse_term()) 1795 elif self._match_text_seq("MAXVALUE"): 1796 seq.set("maxvalue", self._parse_term()) 1797 elif self._match(TokenType.START_WITH) or self._match_text_seq("START"): 1798 self._match_text_seq("=") 1799 seq.set("start", self._parse_term()) 1800 elif self._match_text_seq("CACHE"): 1801 # T-SQL allows empty CACHE which is initialized dynamically 1802 seq.set("cache", self._parse_number() or True) 1803 elif self._match_text_seq("OWNED", "BY"): 1804 # "OWNED BY NONE" is the default 1805 seq.set("owned", None if self._match_text_seq("NONE") else self._parse_column()) 1806 else: 1807 opt = self._parse_var_from_options(self.CREATE_SEQUENCE, raise_unmatched=False) 1808 if opt: 1809 options.append(opt) 1810 else: 1811 break 1812 1813 seq.set("options", options if options else None) 1814 return None if self._index == index else seq 1815 1816 def _parse_property_before(self) -> t.Optional[exp.Expression]: 1817 # only used for teradata currently 1818 self._match(TokenType.COMMA) 1819 1820 kwargs = { 1821 "no": self._match_text_seq("NO"), 1822 "dual": self._match_text_seq("DUAL"), 1823 "before": self._match_text_seq("BEFORE"), 1824 "default": self._match_text_seq("DEFAULT"), 1825 "local": (self._match_text_seq("LOCAL") and "LOCAL") 1826 or (self._match_text_seq("NOT", "LOCAL") and "NOT LOCAL"), 1827 "after": self._match_text_seq("AFTER"), 1828 "minimum": self._match_texts(("MIN", "MINIMUM")), 1829 "maximum": self._match_texts(("MAX", "MAXIMUM")), 1830 } 1831 1832 if self._match_texts(self.PROPERTY_PARSERS): 1833 parser = self.PROPERTY_PARSERS[self._prev.text.upper()] 1834 try: 1835 return parser(self, **{k: v for k, v in kwargs.items() if v}) 1836 except TypeError: 1837 self.raise_error(f"Cannot parse property '{self._prev.text}'") 1838 1839 return None 1840 1841 def _parse_wrapped_properties(self) -> t.List[exp.Expression]: 1842 return self._parse_wrapped_csv(self._parse_property) 1843 1844 def _parse_property(self) -> t.Optional[exp.Expression]: 1845 if self._match_texts(self.PROPERTY_PARSERS): 1846 return self.PROPERTY_PARSERS[self._prev.text.upper()](self) 1847 1848 if self._match(TokenType.DEFAULT) and self._match_texts(self.PROPERTY_PARSERS): 1849 return self.PROPERTY_PARSERS[self._prev.text.upper()](self, default=True) 1850 1851 if self._match_text_seq("COMPOUND", "SORTKEY"): 1852 return self._parse_sortkey(compound=True) 1853 1854 if self._match_text_seq("SQL", "SECURITY"): 1855 return self.expression(exp.SqlSecurityProperty, definer=self._match_text_seq("DEFINER")) 1856 1857 index = self._index 1858 key = self._parse_column() 1859 1860 if not self._match(TokenType.EQ): 1861 self._retreat(index) 1862 return self._parse_sequence_properties() 1863 1864 # Transform the key to exp.Dot if it's dotted identifiers wrapped in exp.Column or to exp.Var otherwise 1865 if isinstance(key, exp.Column): 1866 key = key.to_dot() if len(key.parts) > 1 else exp.var(key.name) 1867 1868 value = self._parse_bitwise() or self._parse_var(any_token=True) 1869 1870 # Transform the value to exp.Var if it was parsed as exp.Column(exp.Identifier()) 1871 if isinstance(value, exp.Column): 1872 value = exp.var(value.name) 1873 1874 return self.expression(exp.Property, this=key, value=value) 1875 1876 def _parse_stored(self) -> exp.FileFormatProperty: 1877 self._match(TokenType.ALIAS) 1878 1879 input_format = self._parse_string() if self._match_text_seq("INPUTFORMAT") else None 1880 output_format = self._parse_string() if self._match_text_seq("OUTPUTFORMAT") else None 1881 1882 return self.expression( 1883 exp.FileFormatProperty, 1884 this=( 1885 self.expression( 1886 exp.InputOutputFormat, input_format=input_format, output_format=output_format 1887 ) 1888 if input_format or output_format 1889 else self._parse_var_or_string() or self._parse_number() or self._parse_id_var() 1890 ), 1891 ) 1892 1893 def _parse_unquoted_field(self) -> t.Optional[exp.Expression]: 1894 field = self._parse_field() 1895 if isinstance(field, exp.Identifier) and not field.quoted: 1896 field = exp.var(field) 1897 1898 return field 1899 1900 def _parse_property_assignment(self, exp_class: t.Type[E], **kwargs: t.Any) -> E: 1901 self._match(TokenType.EQ) 1902 self._match(TokenType.ALIAS) 1903 1904 return self.expression(exp_class, this=self._parse_unquoted_field(), **kwargs) 1905 1906 def _parse_properties(self, before: t.Optional[bool] = None) -> t.Optional[exp.Properties]: 1907 properties = [] 1908 while True: 1909 if before: 1910 prop = self._parse_property_before() 1911 else: 1912 prop = self._parse_property() 1913 if not prop: 1914 break 1915 for p in ensure_list(prop): 1916 properties.append(p) 1917 1918 if properties: 1919 return self.expression(exp.Properties, expressions=properties) 1920 1921 return None 1922 1923 def _parse_fallback(self, no: bool = False) -> exp.FallbackProperty: 1924 return self.expression( 1925 exp.FallbackProperty, no=no, protection=self._match_text_seq("PROTECTION") 1926 ) 1927 1928 def _parse_volatile_property(self) -> exp.VolatileProperty | exp.StabilityProperty: 1929 if self._index >= 2: 1930 pre_volatile_token = self._tokens[self._index - 2] 1931 else: 1932 pre_volatile_token = None 1933 1934 if pre_volatile_token and pre_volatile_token.token_type in self.PRE_VOLATILE_TOKENS: 1935 return exp.VolatileProperty() 1936 1937 return self.expression(exp.StabilityProperty, this=exp.Literal.string("VOLATILE")) 1938 1939 def _parse_retention_period(self) -> exp.Var: 1940 # Parse TSQL's HISTORY_RETENTION_PERIOD: {INFINITE | <number> DAY | DAYS | MONTH ...} 1941 number = self._parse_number() 1942 number_str = f"{number} " if number else "" 1943 unit = self._parse_var(any_token=True) 1944 return exp.var(f"{number_str}{unit}") 1945 1946 def _parse_system_versioning_property( 1947 self, with_: bool = False 1948 ) -> exp.WithSystemVersioningProperty: 1949 self._match(TokenType.EQ) 1950 prop = self.expression( 1951 exp.WithSystemVersioningProperty, 1952 **{ # type: ignore 1953 "on": True, 1954 "with": with_, 1955 }, 1956 ) 1957 1958 if self._match_text_seq("OFF"): 1959 prop.set("on", False) 1960 return prop 1961 1962 self._match(TokenType.ON) 1963 if self._match(TokenType.L_PAREN): 1964 while self._curr and not self._match(TokenType.R_PAREN): 1965 if self._match_text_seq("HISTORY_TABLE", "="): 1966 prop.set("this", self._parse_table_parts()) 1967 elif self._match_text_seq("DATA_CONSISTENCY_CHECK", "="): 1968 prop.set("data_consistency", self._advance_any() and self._prev.text.upper()) 1969 elif self._match_text_seq("HISTORY_RETENTION_PERIOD", "="): 1970 prop.set("retention_period", self._parse_retention_period()) 1971 1972 self._match(TokenType.COMMA) 1973 1974 return prop 1975 1976 def _parse_data_deletion_property(self) -> exp.DataDeletionProperty: 1977 self._match(TokenType.EQ) 1978 on = self._match_text_seq("ON") or not self._match_text_seq("OFF") 1979 prop = self.expression(exp.DataDeletionProperty, on=on) 1980 1981 if self._match(TokenType.L_PAREN): 1982 while self._curr and not self._match(TokenType.R_PAREN): 1983 if self._match_text_seq("FILTER_COLUMN", "="): 1984 prop.set("filter_column", self._parse_column()) 1985 elif self._match_text_seq("RETENTION_PERIOD", "="): 1986 prop.set("retention_period", self._parse_retention_period()) 1987 1988 self._match(TokenType.COMMA) 1989 1990 return prop 1991 1992 def _parse_with_property(self) -> t.Optional[exp.Expression] | t.List[exp.Expression]: 1993 if self._match_text_seq("(", "SYSTEM_VERSIONING"): 1994 prop = self._parse_system_versioning_property(with_=True) 1995 self._match_r_paren() 1996 return prop 1997 1998 if self._match(TokenType.L_PAREN, advance=False): 1999 return self._parse_wrapped_properties() 2000 2001 if self._match_text_seq("JOURNAL"): 2002 return self._parse_withjournaltable() 2003 2004 if self._match_texts(self.VIEW_ATTRIBUTES): 2005 return self.expression(exp.ViewAttributeProperty, this=self._prev.text.upper()) 2006 2007 if self._match_text_seq("DATA"): 2008 return self._parse_withdata(no=False) 2009 elif self._match_text_seq("NO", "DATA"): 2010 return self._parse_withdata(no=True) 2011 2012 if self._match(TokenType.SERDE_PROPERTIES, advance=False): 2013 return self._parse_serde_properties(with_=True) 2014 2015 if not self._next: 2016 return None 2017 2018 return self._parse_withisolatedloading() 2019 2020 # https://dev.mysql.com/doc/refman/8.0/en/create-view.html 2021 def _parse_definer(self) -> t.Optional[exp.DefinerProperty]: 2022 self._match(TokenType.EQ) 2023 2024 user = self._parse_id_var() 2025 self._match(TokenType.PARAMETER) 2026 host = self._parse_id_var() or (self._match(TokenType.MOD) and self._prev.text) 2027 2028 if not user or not host: 2029 return None 2030 2031 return exp.DefinerProperty(this=f"{user}@{host}") 2032 2033 def _parse_withjournaltable(self) -> exp.WithJournalTableProperty: 2034 self._match(TokenType.TABLE) 2035 self._match(TokenType.EQ) 2036 return self.expression(exp.WithJournalTableProperty, this=self._parse_table_parts()) 2037 2038 def _parse_log(self, no: bool = False) -> exp.LogProperty: 2039 return self.expression(exp.LogProperty, no=no) 2040 2041 def _parse_journal(self, **kwargs) -> exp.JournalProperty: 2042 return self.expression(exp.JournalProperty, **kwargs) 2043 2044 def _parse_checksum(self) -> exp.ChecksumProperty: 2045 self._match(TokenType.EQ) 2046 2047 on = None 2048 if self._match(TokenType.ON): 2049 on = True 2050 elif self._match_text_seq("OFF"): 2051 on = False 2052 2053 return self.expression(exp.ChecksumProperty, on=on, default=self._match(TokenType.DEFAULT)) 2054 2055 def _parse_cluster(self, wrapped: bool = False) -> exp.Cluster: 2056 return self.expression( 2057 exp.Cluster, 2058 expressions=( 2059 self._parse_wrapped_csv(self._parse_ordered) 2060 if wrapped 2061 else self._parse_csv(self._parse_ordered) 2062 ), 2063 ) 2064 2065 def _parse_clustered_by(self) -> exp.ClusteredByProperty: 2066 self._match_text_seq("BY") 2067 2068 self._match_l_paren() 2069 expressions = self._parse_csv(self._parse_column) 2070 self._match_r_paren() 2071 2072 if self._match_text_seq("SORTED", "BY"): 2073 self._match_l_paren() 2074 sorted_by = self._parse_csv(self._parse_ordered) 2075 self._match_r_paren() 2076 else: 2077 sorted_by = None 2078 2079 self._match(TokenType.INTO) 2080 buckets = self._parse_number() 2081 self._match_text_seq("BUCKETS") 2082 2083 return self.expression( 2084 exp.ClusteredByProperty, 2085 expressions=expressions, 2086 sorted_by=sorted_by, 2087 buckets=buckets, 2088 ) 2089 2090 def _parse_copy_property(self) -> t.Optional[exp.CopyGrantsProperty]: 2091 if not self._match_text_seq("GRANTS"): 2092 self._retreat(self._index - 1) 2093 return None 2094 2095 return self.expression(exp.CopyGrantsProperty) 2096 2097 def _parse_freespace(self) -> exp.FreespaceProperty: 2098 self._match(TokenType.EQ) 2099 return self.expression( 2100 exp.FreespaceProperty, this=self._parse_number(), percent=self._match(TokenType.PERCENT) 2101 ) 2102 2103 def _parse_mergeblockratio( 2104 self, no: bool = False, default: bool = False 2105 ) -> exp.MergeBlockRatioProperty: 2106 if self._match(TokenType.EQ): 2107 return self.expression( 2108 exp.MergeBlockRatioProperty, 2109 this=self._parse_number(), 2110 percent=self._match(TokenType.PERCENT), 2111 ) 2112 2113 return self.expression(exp.MergeBlockRatioProperty, no=no, default=default) 2114 2115 def _parse_datablocksize( 2116 self, 2117 default: t.Optional[bool] = None, 2118 minimum: t.Optional[bool] = None, 2119 maximum: t.Optional[bool] = None, 2120 ) -> exp.DataBlocksizeProperty: 2121 self._match(TokenType.EQ) 2122 size = self._parse_number() 2123 2124 units = None 2125 if self._match_texts(("BYTES", "KBYTES", "KILOBYTES")): 2126 units = self._prev.text 2127 2128 return self.expression( 2129 exp.DataBlocksizeProperty, 2130 size=size, 2131 units=units, 2132 default=default, 2133 minimum=minimum, 2134 maximum=maximum, 2135 ) 2136 2137 def _parse_blockcompression(self) -> exp.BlockCompressionProperty: 2138 self._match(TokenType.EQ) 2139 always = self._match_text_seq("ALWAYS") 2140 manual = self._match_text_seq("MANUAL") 2141 never = self._match_text_seq("NEVER") 2142 default = self._match_text_seq("DEFAULT") 2143 2144 autotemp = None 2145 if self._match_text_seq("AUTOTEMP"): 2146 autotemp = self._parse_schema() 2147 2148 return self.expression( 2149 exp.BlockCompressionProperty, 2150 always=always, 2151 manual=manual, 2152 never=never, 2153 default=default, 2154 autotemp=autotemp, 2155 ) 2156 2157 def _parse_withisolatedloading(self) -> t.Optional[exp.IsolatedLoadingProperty]: 2158 index = self._index 2159 no = self._match_text_seq("NO") 2160 concurrent = self._match_text_seq("CONCURRENT") 2161 2162 if not self._match_text_seq("ISOLATED", "LOADING"): 2163 self._retreat(index) 2164 return None 2165 2166 target = self._parse_var_from_options(self.ISOLATED_LOADING_OPTIONS, raise_unmatched=False) 2167 return self.expression( 2168 exp.IsolatedLoadingProperty, no=no, concurrent=concurrent, target=target 2169 ) 2170 2171 def _parse_locking(self) -> exp.LockingProperty: 2172 if self._match(TokenType.TABLE): 2173 kind = "TABLE" 2174 elif self._match(TokenType.VIEW): 2175 kind = "VIEW" 2176 elif self._match(TokenType.ROW): 2177 kind = "ROW" 2178 elif self._match_text_seq("DATABASE"): 2179 kind = "DATABASE" 2180 else: 2181 kind = None 2182 2183 if kind in ("DATABASE", "TABLE", "VIEW"): 2184 this = self._parse_table_parts() 2185 else: 2186 this = None 2187 2188 if self._match(TokenType.FOR): 2189 for_or_in = "FOR" 2190 elif self._match(TokenType.IN): 2191 for_or_in = "IN" 2192 else: 2193 for_or_in = None 2194 2195 if self._match_text_seq("ACCESS"): 2196 lock_type = "ACCESS" 2197 elif self._match_texts(("EXCL", "EXCLUSIVE")): 2198 lock_type = "EXCLUSIVE" 2199 elif self._match_text_seq("SHARE"): 2200 lock_type = "SHARE" 2201 elif self._match_text_seq("READ"): 2202 lock_type = "READ" 2203 elif self._match_text_seq("WRITE"): 2204 lock_type = "WRITE" 2205 elif self._match_text_seq("CHECKSUM"): 2206 lock_type = "CHECKSUM" 2207 else: 2208 lock_type = None 2209 2210 override = self._match_text_seq("OVERRIDE") 2211 2212 return self.expression( 2213 exp.LockingProperty, 2214 this=this, 2215 kind=kind, 2216 for_or_in=for_or_in, 2217 lock_type=lock_type, 2218 override=override, 2219 ) 2220 2221 def _parse_partition_by(self) -> t.List[exp.Expression]: 2222 if self._match(TokenType.PARTITION_BY): 2223 return self._parse_csv(self._parse_assignment) 2224 return [] 2225 2226 def _parse_partition_bound_spec(self) -> exp.PartitionBoundSpec: 2227 def _parse_partition_bound_expr() -> t.Optional[exp.Expression]: 2228 if self._match_text_seq("MINVALUE"): 2229 return exp.var("MINVALUE") 2230 if self._match_text_seq("MAXVALUE"): 2231 return exp.var("MAXVALUE") 2232 return self._parse_bitwise() 2233 2234 this: t.Optional[exp.Expression | t.List[exp.Expression]] = None 2235 expression = None 2236 from_expressions = None 2237 to_expressions = None 2238 2239 if self._match(TokenType.IN): 2240 this = self._parse_wrapped_csv(self._parse_bitwise) 2241 elif self._match(TokenType.FROM): 2242 from_expressions = self._parse_wrapped_csv(_parse_partition_bound_expr) 2243 self._match_text_seq("TO") 2244 to_expressions = self._parse_wrapped_csv(_parse_partition_bound_expr) 2245 elif self._match_text_seq("WITH", "(", "MODULUS"): 2246 this = self._parse_number() 2247 self._match_text_seq(",", "REMAINDER") 2248 expression = self._parse_number() 2249 self._match_r_paren() 2250 else: 2251 self.raise_error("Failed to parse partition bound spec.") 2252 2253 return self.expression( 2254 exp.PartitionBoundSpec, 2255 this=this, 2256 expression=expression, 2257 from_expressions=from_expressions, 2258 to_expressions=to_expressions, 2259 ) 2260 2261 # https://www.postgresql.org/docs/current/sql-createtable.html 2262 def _parse_partitioned_of(self) -> t.Optional[exp.PartitionedOfProperty]: 2263 if not self._match_text_seq("OF"): 2264 self._retreat(self._index - 1) 2265 return None 2266 2267 this = self._parse_table(schema=True) 2268 2269 if self._match(TokenType.DEFAULT): 2270 expression: exp.Var | exp.PartitionBoundSpec = exp.var("DEFAULT") 2271 elif self._match_text_seq("FOR", "VALUES"): 2272 expression = self._parse_partition_bound_spec() 2273 else: 2274 self.raise_error("Expecting either DEFAULT or FOR VALUES clause.") 2275 2276 return self.expression(exp.PartitionedOfProperty, this=this, expression=expression) 2277 2278 def _parse_partitioned_by(self) -> exp.PartitionedByProperty: 2279 self._match(TokenType.EQ) 2280 return self.expression( 2281 exp.PartitionedByProperty, 2282 this=self._parse_schema() or self._parse_bracket(self._parse_field()), 2283 ) 2284 2285 def _parse_withdata(self, no: bool = False) -> exp.WithDataProperty: 2286 if self._match_text_seq("AND", "STATISTICS"): 2287 statistics = True 2288 elif self._match_text_seq("AND", "NO", "STATISTICS"): 2289 statistics = False 2290 else: 2291 statistics = None 2292 2293 return self.expression(exp.WithDataProperty, no=no, statistics=statistics) 2294 2295 def _parse_contains_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2296 if self._match_text_seq("SQL"): 2297 return self.expression(exp.SqlReadWriteProperty, this="CONTAINS SQL") 2298 return None 2299 2300 def _parse_modifies_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2301 if self._match_text_seq("SQL", "DATA"): 2302 return self.expression(exp.SqlReadWriteProperty, this="MODIFIES SQL DATA") 2303 return None 2304 2305 def _parse_no_property(self) -> t.Optional[exp.Expression]: 2306 if self._match_text_seq("PRIMARY", "INDEX"): 2307 return exp.NoPrimaryIndexProperty() 2308 if self._match_text_seq("SQL"): 2309 return self.expression(exp.SqlReadWriteProperty, this="NO SQL") 2310 return None 2311 2312 def _parse_on_property(self) -> t.Optional[exp.Expression]: 2313 if self._match_text_seq("COMMIT", "PRESERVE", "ROWS"): 2314 return exp.OnCommitProperty() 2315 if self._match_text_seq("COMMIT", "DELETE", "ROWS"): 2316 return exp.OnCommitProperty(delete=True) 2317 return self.expression(exp.OnProperty, this=self._parse_schema(self._parse_id_var())) 2318 2319 def _parse_reads_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2320 if self._match_text_seq("SQL", "DATA"): 2321 return self.expression(exp.SqlReadWriteProperty, this="READS SQL DATA") 2322 return None 2323 2324 def _parse_distkey(self) -> exp.DistKeyProperty: 2325 return self.expression(exp.DistKeyProperty, this=self._parse_wrapped(self._parse_id_var)) 2326 2327 def _parse_create_like(self) -> t.Optional[exp.LikeProperty]: 2328 table = self._parse_table(schema=True) 2329 2330 options = [] 2331 while self._match_texts(("INCLUDING", "EXCLUDING")): 2332 this = self._prev.text.upper() 2333 2334 id_var = self._parse_id_var() 2335 if not id_var: 2336 return None 2337 2338 options.append( 2339 self.expression(exp.Property, this=this, value=exp.var(id_var.this.upper())) 2340 ) 2341 2342 return self.expression(exp.LikeProperty, this=table, expressions=options) 2343 2344 def _parse_sortkey(self, compound: bool = False) -> exp.SortKeyProperty: 2345 return self.expression( 2346 exp.SortKeyProperty, this=self._parse_wrapped_id_vars(), compound=compound 2347 ) 2348 2349 def _parse_character_set(self, default: bool = False) -> exp.CharacterSetProperty: 2350 self._match(TokenType.EQ) 2351 return self.expression( 2352 exp.CharacterSetProperty, this=self._parse_var_or_string(), default=default 2353 ) 2354 2355 def _parse_remote_with_connection(self) -> exp.RemoteWithConnectionModelProperty: 2356 self._match_text_seq("WITH", "CONNECTION") 2357 return self.expression( 2358 exp.RemoteWithConnectionModelProperty, this=self._parse_table_parts() 2359 ) 2360 2361 def _parse_returns(self) -> exp.ReturnsProperty: 2362 value: t.Optional[exp.Expression] 2363 null = None 2364 is_table = self._match(TokenType.TABLE) 2365 2366 if is_table: 2367 if self._match(TokenType.LT): 2368 value = self.expression( 2369 exp.Schema, 2370 this="TABLE", 2371 expressions=self._parse_csv(self._parse_struct_types), 2372 ) 2373 if not self._match(TokenType.GT): 2374 self.raise_error("Expecting >") 2375 else: 2376 value = self._parse_schema(exp.var("TABLE")) 2377 elif self._match_text_seq("NULL", "ON", "NULL", "INPUT"): 2378 null = True 2379 value = None 2380 else: 2381 value = self._parse_types() 2382 2383 return self.expression(exp.ReturnsProperty, this=value, is_table=is_table, null=null) 2384 2385 def _parse_describe(self) -> exp.Describe: 2386 kind = self._match_set(self.CREATABLES) and self._prev.text 2387 style = self._match_texts(("EXTENDED", "FORMATTED", "HISTORY")) and self._prev.text.upper() 2388 if self._match(TokenType.DOT): 2389 style = None 2390 self._retreat(self._index - 2) 2391 this = self._parse_table(schema=True) 2392 properties = self._parse_properties() 2393 expressions = properties.expressions if properties else None 2394 return self.expression( 2395 exp.Describe, this=this, style=style, kind=kind, expressions=expressions 2396 ) 2397 2398 def _parse_insert(self) -> exp.Insert: 2399 comments = ensure_list(self._prev_comments) 2400 hint = self._parse_hint() 2401 overwrite = self._match(TokenType.OVERWRITE) 2402 ignore = self._match(TokenType.IGNORE) 2403 local = self._match_text_seq("LOCAL") 2404 alternative = None 2405 is_function = None 2406 2407 if self._match_text_seq("DIRECTORY"): 2408 this: t.Optional[exp.Expression] = self.expression( 2409 exp.Directory, 2410 this=self._parse_var_or_string(), 2411 local=local, 2412 row_format=self._parse_row_format(match_row=True), 2413 ) 2414 else: 2415 if self._match(TokenType.OR): 2416 alternative = self._match_texts(self.INSERT_ALTERNATIVES) and self._prev.text 2417 2418 self._match(TokenType.INTO) 2419 comments += ensure_list(self._prev_comments) 2420 self._match(TokenType.TABLE) 2421 is_function = self._match(TokenType.FUNCTION) 2422 2423 this = ( 2424 self._parse_table(schema=True, parse_partition=True) 2425 if not is_function 2426 else self._parse_function() 2427 ) 2428 2429 returning = self._parse_returning() 2430 2431 return self.expression( 2432 exp.Insert, 2433 comments=comments, 2434 hint=hint, 2435 is_function=is_function, 2436 this=this, 2437 stored=self._match_text_seq("STORED") and self._parse_stored(), 2438 by_name=self._match_text_seq("BY", "NAME"), 2439 exists=self._parse_exists(), 2440 where=self._match_pair(TokenType.REPLACE, TokenType.WHERE) and self._parse_assignment(), 2441 expression=self._parse_derived_table_values() or self._parse_ddl_select(), 2442 conflict=self._parse_on_conflict(), 2443 returning=returning or self._parse_returning(), 2444 overwrite=overwrite, 2445 alternative=alternative, 2446 ignore=ignore, 2447 ) 2448 2449 def _parse_kill(self) -> exp.Kill: 2450 kind = exp.var(self._prev.text) if self._match_texts(("CONNECTION", "QUERY")) else None 2451 2452 return self.expression( 2453 exp.Kill, 2454 this=self._parse_primary(), 2455 kind=kind, 2456 ) 2457 2458 def _parse_on_conflict(self) -> t.Optional[exp.OnConflict]: 2459 conflict = self._match_text_seq("ON", "CONFLICT") 2460 duplicate = self._match_text_seq("ON", "DUPLICATE", "KEY") 2461 2462 if not conflict and not duplicate: 2463 return None 2464 2465 conflict_keys = None 2466 constraint = None 2467 2468 if conflict: 2469 if self._match_text_seq("ON", "CONSTRAINT"): 2470 constraint = self._parse_id_var() 2471 elif self._match(TokenType.L_PAREN): 2472 conflict_keys = self._parse_csv(self._parse_id_var) 2473 self._match_r_paren() 2474 2475 action = self._parse_var_from_options(self.CONFLICT_ACTIONS) 2476 if self._prev.token_type == TokenType.UPDATE: 2477 self._match(TokenType.SET) 2478 expressions = self._parse_csv(self._parse_equality) 2479 else: 2480 expressions = None 2481 2482 return self.expression( 2483 exp.OnConflict, 2484 duplicate=duplicate, 2485 expressions=expressions, 2486 action=action, 2487 conflict_keys=conflict_keys, 2488 constraint=constraint, 2489 ) 2490 2491 def _parse_returning(self) -> t.Optional[exp.Returning]: 2492 if not self._match(TokenType.RETURNING): 2493 return None 2494 return self.expression( 2495 exp.Returning, 2496 expressions=self._parse_csv(self._parse_expression), 2497 into=self._match(TokenType.INTO) and self._parse_table_part(), 2498 ) 2499 2500 def _parse_row(self) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 2501 if not self._match(TokenType.FORMAT): 2502 return None 2503 return self._parse_row_format() 2504 2505 def _parse_serde_properties(self, with_: bool = False) -> t.Optional[exp.SerdeProperties]: 2506 index = self._index 2507 with_ = with_ or self._match_text_seq("WITH") 2508 2509 if not self._match(TokenType.SERDE_PROPERTIES): 2510 self._retreat(index) 2511 return None 2512 return self.expression( 2513 exp.SerdeProperties, 2514 **{ # type: ignore 2515 "expressions": self._parse_wrapped_properties(), 2516 "with": with_, 2517 }, 2518 ) 2519 2520 def _parse_row_format( 2521 self, match_row: bool = False 2522 ) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 2523 if match_row and not self._match_pair(TokenType.ROW, TokenType.FORMAT): 2524 return None 2525 2526 if self._match_text_seq("SERDE"): 2527 this = self._parse_string() 2528 2529 serde_properties = self._parse_serde_properties() 2530 2531 return self.expression( 2532 exp.RowFormatSerdeProperty, this=this, serde_properties=serde_properties 2533 ) 2534 2535 self._match_text_seq("DELIMITED") 2536 2537 kwargs = {} 2538 2539 if self._match_text_seq("FIELDS", "TERMINATED", "BY"): 2540 kwargs["fields"] = self._parse_string() 2541 if self._match_text_seq("ESCAPED", "BY"): 2542 kwargs["escaped"] = self._parse_string() 2543 if self._match_text_seq("COLLECTION", "ITEMS", "TERMINATED", "BY"): 2544 kwargs["collection_items"] = self._parse_string() 2545 if self._match_text_seq("MAP", "KEYS", "TERMINATED", "BY"): 2546 kwargs["map_keys"] = self._parse_string() 2547 if self._match_text_seq("LINES", "TERMINATED", "BY"): 2548 kwargs["lines"] = self._parse_string() 2549 if self._match_text_seq("NULL", "DEFINED", "AS"): 2550 kwargs["null"] = self._parse_string() 2551 2552 return self.expression(exp.RowFormatDelimitedProperty, **kwargs) # type: ignore 2553 2554 def _parse_load(self) -> exp.LoadData | exp.Command: 2555 if self._match_text_seq("DATA"): 2556 local = self._match_text_seq("LOCAL") 2557 self._match_text_seq("INPATH") 2558 inpath = self._parse_string() 2559 overwrite = self._match(TokenType.OVERWRITE) 2560 self._match_pair(TokenType.INTO, TokenType.TABLE) 2561 2562 return self.expression( 2563 exp.LoadData, 2564 this=self._parse_table(schema=True), 2565 local=local, 2566 overwrite=overwrite, 2567 inpath=inpath, 2568 partition=self._parse_partition(), 2569 input_format=self._match_text_seq("INPUTFORMAT") and self._parse_string(), 2570 serde=self._match_text_seq("SERDE") and self._parse_string(), 2571 ) 2572 return self._parse_as_command(self._prev) 2573 2574 def _parse_delete(self) -> exp.Delete: 2575 # This handles MySQL's "Multiple-Table Syntax" 2576 # https://dev.mysql.com/doc/refman/8.0/en/delete.html 2577 tables = None 2578 comments = self._prev_comments 2579 if not self._match(TokenType.FROM, advance=False): 2580 tables = self._parse_csv(self._parse_table) or None 2581 2582 returning = self._parse_returning() 2583 2584 return self.expression( 2585 exp.Delete, 2586 comments=comments, 2587 tables=tables, 2588 this=self._match(TokenType.FROM) and self._parse_table(joins=True), 2589 using=self._match(TokenType.USING) and self._parse_table(joins=True), 2590 where=self._parse_where(), 2591 returning=returning or self._parse_returning(), 2592 limit=self._parse_limit(), 2593 ) 2594 2595 def _parse_update(self) -> exp.Update: 2596 comments = self._prev_comments 2597 this = self._parse_table(joins=True, alias_tokens=self.UPDATE_ALIAS_TOKENS) 2598 expressions = self._match(TokenType.SET) and self._parse_csv(self._parse_equality) 2599 returning = self._parse_returning() 2600 return self.expression( 2601 exp.Update, 2602 comments=comments, 2603 **{ # type: ignore 2604 "this": this, 2605 "expressions": expressions, 2606 "from": self._parse_from(joins=True), 2607 "where": self._parse_where(), 2608 "returning": returning or self._parse_returning(), 2609 "order": self._parse_order(), 2610 "limit": self._parse_limit(), 2611 }, 2612 ) 2613 2614 def _parse_uncache(self) -> exp.Uncache: 2615 if not self._match(TokenType.TABLE): 2616 self.raise_error("Expecting TABLE after UNCACHE") 2617 2618 return self.expression( 2619 exp.Uncache, exists=self._parse_exists(), this=self._parse_table(schema=True) 2620 ) 2621 2622 def _parse_cache(self) -> exp.Cache: 2623 lazy = self._match_text_seq("LAZY") 2624 self._match(TokenType.TABLE) 2625 table = self._parse_table(schema=True) 2626 2627 options = [] 2628 if self._match_text_seq("OPTIONS"): 2629 self._match_l_paren() 2630 k = self._parse_string() 2631 self._match(TokenType.EQ) 2632 v = self._parse_string() 2633 options = [k, v] 2634 self._match_r_paren() 2635 2636 self._match(TokenType.ALIAS) 2637 return self.expression( 2638 exp.Cache, 2639 this=table, 2640 lazy=lazy, 2641 options=options, 2642 expression=self._parse_select(nested=True), 2643 ) 2644 2645 def _parse_partition(self) -> t.Optional[exp.Partition]: 2646 if not self._match(TokenType.PARTITION): 2647 return None 2648 2649 return self.expression( 2650 exp.Partition, expressions=self._parse_wrapped_csv(self._parse_assignment) 2651 ) 2652 2653 def _parse_value(self) -> t.Optional[exp.Tuple]: 2654 if self._match(TokenType.L_PAREN): 2655 expressions = self._parse_csv(self._parse_expression) 2656 self._match_r_paren() 2657 return self.expression(exp.Tuple, expressions=expressions) 2658 2659 # In some dialects we can have VALUES 1, 2 which results in 1 column & 2 rows. 2660 expression = self._parse_expression() 2661 if expression: 2662 return self.expression(exp.Tuple, expressions=[expression]) 2663 return None 2664 2665 def _parse_projections(self) -> t.List[exp.Expression]: 2666 return self._parse_expressions() 2667 2668 def _parse_select( 2669 self, 2670 nested: bool = False, 2671 table: bool = False, 2672 parse_subquery_alias: bool = True, 2673 parse_set_operation: bool = True, 2674 ) -> t.Optional[exp.Expression]: 2675 cte = self._parse_with() 2676 2677 if cte: 2678 this = self._parse_statement() 2679 2680 if not this: 2681 self.raise_error("Failed to parse any statement following CTE") 2682 return cte 2683 2684 if "with" in this.arg_types: 2685 this.set("with", cte) 2686 else: 2687 self.raise_error(f"{this.key} does not support CTE") 2688 this = cte 2689 2690 return this 2691 2692 # duckdb supports leading with FROM x 2693 from_ = self._parse_from() if self._match(TokenType.FROM, advance=False) else None 2694 2695 if self._match(TokenType.SELECT): 2696 comments = self._prev_comments 2697 2698 hint = self._parse_hint() 2699 all_ = self._match(TokenType.ALL) 2700 distinct = self._match_set(self.DISTINCT_TOKENS) 2701 2702 kind = ( 2703 self._match(TokenType.ALIAS) 2704 and self._match_texts(("STRUCT", "VALUE")) 2705 and self._prev.text.upper() 2706 ) 2707 2708 if distinct: 2709 distinct = self.expression( 2710 exp.Distinct, 2711 on=self._parse_value() if self._match(TokenType.ON) else None, 2712 ) 2713 2714 if all_ and distinct: 2715 self.raise_error("Cannot specify both ALL and DISTINCT after SELECT") 2716 2717 limit = self._parse_limit(top=True) 2718 projections = self._parse_projections() 2719 2720 this = self.expression( 2721 exp.Select, 2722 kind=kind, 2723 hint=hint, 2724 distinct=distinct, 2725 expressions=projections, 2726 limit=limit, 2727 ) 2728 this.comments = comments 2729 2730 into = self._parse_into() 2731 if into: 2732 this.set("into", into) 2733 2734 if not from_: 2735 from_ = self._parse_from() 2736 2737 if from_: 2738 this.set("from", from_) 2739 2740 this = self._parse_query_modifiers(this) 2741 elif (table or nested) and self._match(TokenType.L_PAREN): 2742 if self._match(TokenType.PIVOT): 2743 this = self._parse_simplified_pivot() 2744 elif self._match(TokenType.FROM): 2745 this = exp.select("*").from_( 2746 t.cast(exp.From, self._parse_from(skip_from_token=True)) 2747 ) 2748 else: 2749 this = ( 2750 self._parse_table() 2751 if table 2752 else self._parse_select(nested=True, parse_set_operation=False) 2753 ) 2754 this = self._parse_query_modifiers(self._parse_set_operations(this)) 2755 2756 self._match_r_paren() 2757 2758 # We return early here so that the UNION isn't attached to the subquery by the 2759 # following call to _parse_set_operations, but instead becomes the parent node 2760 return self._parse_subquery(this, parse_alias=parse_subquery_alias) 2761 elif self._match(TokenType.VALUES, advance=False): 2762 this = self._parse_derived_table_values() 2763 elif from_: 2764 this = exp.select("*").from_(from_.this, copy=False) 2765 else: 2766 this = None 2767 2768 if parse_set_operation: 2769 return self._parse_set_operations(this) 2770 return this 2771 2772 def _parse_with(self, skip_with_token: bool = False) -> t.Optional[exp.With]: 2773 if not skip_with_token and not self._match(TokenType.WITH): 2774 return None 2775 2776 comments = self._prev_comments 2777 recursive = self._match(TokenType.RECURSIVE) 2778 2779 expressions = [] 2780 while True: 2781 expressions.append(self._parse_cte()) 2782 2783 if not self._match(TokenType.COMMA) and not self._match(TokenType.WITH): 2784 break 2785 else: 2786 self._match(TokenType.WITH) 2787 2788 return self.expression( 2789 exp.With, comments=comments, expressions=expressions, recursive=recursive 2790 ) 2791 2792 def _parse_cte(self) -> exp.CTE: 2793 alias = self._parse_table_alias(self.ID_VAR_TOKENS) 2794 if not alias or not alias.this: 2795 self.raise_error("Expected CTE to have alias") 2796 2797 self._match(TokenType.ALIAS) 2798 2799 if self._match_text_seq("NOT", "MATERIALIZED"): 2800 materialized = False 2801 elif self._match_text_seq("MATERIALIZED"): 2802 materialized = True 2803 else: 2804 materialized = None 2805 2806 return self.expression( 2807 exp.CTE, 2808 this=self._parse_wrapped(self._parse_statement), 2809 alias=alias, 2810 materialized=materialized, 2811 ) 2812 2813 def _parse_table_alias( 2814 self, alias_tokens: t.Optional[t.Collection[TokenType]] = None 2815 ) -> t.Optional[exp.TableAlias]: 2816 any_token = self._match(TokenType.ALIAS) 2817 alias = ( 2818 self._parse_id_var(any_token=any_token, tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 2819 or self._parse_string_as_identifier() 2820 ) 2821 2822 index = self._index 2823 if self._match(TokenType.L_PAREN): 2824 columns = self._parse_csv(self._parse_function_parameter) 2825 self._match_r_paren() if columns else self._retreat(index) 2826 else: 2827 columns = None 2828 2829 if not alias and not columns: 2830 return None 2831 2832 table_alias = self.expression(exp.TableAlias, this=alias, columns=columns) 2833 2834 # We bubble up comments from the Identifier to the TableAlias 2835 if isinstance(alias, exp.Identifier): 2836 table_alias.add_comments(alias.pop_comments()) 2837 2838 return table_alias 2839 2840 def _parse_subquery( 2841 self, this: t.Optional[exp.Expression], parse_alias: bool = True 2842 ) -> t.Optional[exp.Subquery]: 2843 if not this: 2844 return None 2845 2846 return self.expression( 2847 exp.Subquery, 2848 this=this, 2849 pivots=self._parse_pivots(), 2850 alias=self._parse_table_alias() if parse_alias else None, 2851 ) 2852 2853 def _implicit_unnests_to_explicit(self, this: E) -> E: 2854 from sqlglot.optimizer.normalize_identifiers import normalize_identifiers as _norm 2855 2856 refs = {_norm(this.args["from"].this.copy(), dialect=self.dialect).alias_or_name} 2857 for i, join in enumerate(this.args.get("joins") or []): 2858 table = join.this 2859 normalized_table = table.copy() 2860 normalized_table.meta["maybe_column"] = True 2861 normalized_table = _norm(normalized_table, dialect=self.dialect) 2862 2863 if isinstance(table, exp.Table) and not join.args.get("on"): 2864 if normalized_table.parts[0].name in refs: 2865 table_as_column = table.to_column() 2866 unnest = exp.Unnest(expressions=[table_as_column]) 2867 2868 # Table.to_column creates a parent Alias node that we want to convert to 2869 # a TableAlias and attach to the Unnest, so it matches the parser's output 2870 if isinstance(table.args.get("alias"), exp.TableAlias): 2871 table_as_column.replace(table_as_column.this) 2872 exp.alias_(unnest, None, table=[table.args["alias"].this], copy=False) 2873 2874 table.replace(unnest) 2875 2876 refs.add(normalized_table.alias_or_name) 2877 2878 return this 2879 2880 def _parse_query_modifiers( 2881 self, this: t.Optional[exp.Expression] 2882 ) -> t.Optional[exp.Expression]: 2883 if isinstance(this, (exp.Query, exp.Table)): 2884 for join in self._parse_joins(): 2885 this.append("joins", join) 2886 for lateral in iter(self._parse_lateral, None): 2887 this.append("laterals", lateral) 2888 2889 while True: 2890 if self._match_set(self.QUERY_MODIFIER_PARSERS, advance=False): 2891 parser = self.QUERY_MODIFIER_PARSERS[self._curr.token_type] 2892 key, expression = parser(self) 2893 2894 if expression: 2895 this.set(key, expression) 2896 if key == "limit": 2897 offset = expression.args.pop("offset", None) 2898 2899 if offset: 2900 offset = exp.Offset(expression=offset) 2901 this.set("offset", offset) 2902 2903 limit_by_expressions = expression.expressions 2904 expression.set("expressions", None) 2905 offset.set("expressions", limit_by_expressions) 2906 continue 2907 break 2908 2909 if self.SUPPORTS_IMPLICIT_UNNEST and this and this.args.get("from"): 2910 this = self._implicit_unnests_to_explicit(this) 2911 2912 return this 2913 2914 def _parse_hint(self) -> t.Optional[exp.Hint]: 2915 if self._match(TokenType.HINT): 2916 hints = [] 2917 for hint in iter( 2918 lambda: self._parse_csv( 2919 lambda: self._parse_function() or self._parse_var(upper=True) 2920 ), 2921 [], 2922 ): 2923 hints.extend(hint) 2924 2925 if not self._match_pair(TokenType.STAR, TokenType.SLASH): 2926 self.raise_error("Expected */ after HINT") 2927 2928 return self.expression(exp.Hint, expressions=hints) 2929 2930 return None 2931 2932 def _parse_into(self) -> t.Optional[exp.Into]: 2933 if not self._match(TokenType.INTO): 2934 return None 2935 2936 temp = self._match(TokenType.TEMPORARY) 2937 unlogged = self._match_text_seq("UNLOGGED") 2938 self._match(TokenType.TABLE) 2939 2940 return self.expression( 2941 exp.Into, this=self._parse_table(schema=True), temporary=temp, unlogged=unlogged 2942 ) 2943 2944 def _parse_from( 2945 self, joins: bool = False, skip_from_token: bool = False 2946 ) -> t.Optional[exp.From]: 2947 if not skip_from_token and not self._match(TokenType.FROM): 2948 return None 2949 2950 return self.expression( 2951 exp.From, comments=self._prev_comments, this=self._parse_table(joins=joins) 2952 ) 2953 2954 def _parse_match_recognize_measure(self) -> exp.MatchRecognizeMeasure: 2955 return self.expression( 2956 exp.MatchRecognizeMeasure, 2957 window_frame=self._match_texts(("FINAL", "RUNNING")) and self._prev.text.upper(), 2958 this=self._parse_expression(), 2959 ) 2960 2961 def _parse_match_recognize(self) -> t.Optional[exp.MatchRecognize]: 2962 if not self._match(TokenType.MATCH_RECOGNIZE): 2963 return None 2964 2965 self._match_l_paren() 2966 2967 partition = self._parse_partition_by() 2968 order = self._parse_order() 2969 2970 measures = ( 2971 self._parse_csv(self._parse_match_recognize_measure) 2972 if self._match_text_seq("MEASURES") 2973 else None 2974 ) 2975 2976 if self._match_text_seq("ONE", "ROW", "PER", "MATCH"): 2977 rows = exp.var("ONE ROW PER MATCH") 2978 elif self._match_text_seq("ALL", "ROWS", "PER", "MATCH"): 2979 text = "ALL ROWS PER MATCH" 2980 if self._match_text_seq("SHOW", "EMPTY", "MATCHES"): 2981 text += " SHOW EMPTY MATCHES" 2982 elif self._match_text_seq("OMIT", "EMPTY", "MATCHES"): 2983 text += " OMIT EMPTY MATCHES" 2984 elif self._match_text_seq("WITH", "UNMATCHED", "ROWS"): 2985 text += " WITH UNMATCHED ROWS" 2986 rows = exp.var(text) 2987 else: 2988 rows = None 2989 2990 if self._match_text_seq("AFTER", "MATCH", "SKIP"): 2991 text = "AFTER MATCH SKIP" 2992 if self._match_text_seq("PAST", "LAST", "ROW"): 2993 text += " PAST LAST ROW" 2994 elif self._match_text_seq("TO", "NEXT", "ROW"): 2995 text += " TO NEXT ROW" 2996 elif self._match_text_seq("TO", "FIRST"): 2997 text += f" TO FIRST {self._advance_any().text}" # type: ignore 2998 elif self._match_text_seq("TO", "LAST"): 2999 text += f" TO LAST {self._advance_any().text}" # type: ignore 3000 after = exp.var(text) 3001 else: 3002 after = None 3003 3004 if self._match_text_seq("PATTERN"): 3005 self._match_l_paren() 3006 3007 if not self._curr: 3008 self.raise_error("Expecting )", self._curr) 3009 3010 paren = 1 3011 start = self._curr 3012 3013 while self._curr and paren > 0: 3014 if self._curr.token_type == TokenType.L_PAREN: 3015 paren += 1 3016 if self._curr.token_type == TokenType.R_PAREN: 3017 paren -= 1 3018 3019 end = self._prev 3020 self._advance() 3021 3022 if paren > 0: 3023 self.raise_error("Expecting )", self._curr) 3024 3025 pattern = exp.var(self._find_sql(start, end)) 3026 else: 3027 pattern = None 3028 3029 define = ( 3030 self._parse_csv(self._parse_name_as_expression) 3031 if self._match_text_seq("DEFINE") 3032 else None 3033 ) 3034 3035 self._match_r_paren() 3036 3037 return self.expression( 3038 exp.MatchRecognize, 3039 partition_by=partition, 3040 order=order, 3041 measures=measures, 3042 rows=rows, 3043 after=after, 3044 pattern=pattern, 3045 define=define, 3046 alias=self._parse_table_alias(), 3047 ) 3048 3049 def _parse_lateral(self) -> t.Optional[exp.Lateral]: 3050 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY) 3051 if not cross_apply and self._match_pair(TokenType.OUTER, TokenType.APPLY): 3052 cross_apply = False 3053 3054 if cross_apply is not None: 3055 this = self._parse_select(table=True) 3056 view = None 3057 outer = None 3058 elif self._match(TokenType.LATERAL): 3059 this = self._parse_select(table=True) 3060 view = self._match(TokenType.VIEW) 3061 outer = self._match(TokenType.OUTER) 3062 else: 3063 return None 3064 3065 if not this: 3066 this = ( 3067 self._parse_unnest() 3068 or self._parse_function() 3069 or self._parse_id_var(any_token=False) 3070 ) 3071 3072 while self._match(TokenType.DOT): 3073 this = exp.Dot( 3074 this=this, 3075 expression=self._parse_function() or self._parse_id_var(any_token=False), 3076 ) 3077 3078 if view: 3079 table = self._parse_id_var(any_token=False) 3080 columns = self._parse_csv(self._parse_id_var) if self._match(TokenType.ALIAS) else [] 3081 table_alias: t.Optional[exp.TableAlias] = self.expression( 3082 exp.TableAlias, this=table, columns=columns 3083 ) 3084 elif isinstance(this, (exp.Subquery, exp.Unnest)) and this.alias: 3085 # We move the alias from the lateral's child node to the lateral itself 3086 table_alias = this.args["alias"].pop() 3087 else: 3088 table_alias = self._parse_table_alias() 3089 3090 return self.expression( 3091 exp.Lateral, 3092 this=this, 3093 view=view, 3094 outer=outer, 3095 alias=table_alias, 3096 cross_apply=cross_apply, 3097 ) 3098 3099 def _parse_join_parts( 3100 self, 3101 ) -> t.Tuple[t.Optional[Token], t.Optional[Token], t.Optional[Token]]: 3102 return ( 3103 self._match_set(self.JOIN_METHODS) and self._prev, 3104 self._match_set(self.JOIN_SIDES) and self._prev, 3105 self._match_set(self.JOIN_KINDS) and self._prev, 3106 ) 3107 3108 def _parse_join( 3109 self, skip_join_token: bool = False, parse_bracket: bool = False 3110 ) -> t.Optional[exp.Join]: 3111 if self._match(TokenType.COMMA): 3112 return self.expression(exp.Join, this=self._parse_table()) 3113 3114 index = self._index 3115 method, side, kind = self._parse_join_parts() 3116 hint = self._prev.text if self._match_texts(self.JOIN_HINTS) else None 3117 join = self._match(TokenType.JOIN) or (kind and kind.token_type == TokenType.STRAIGHT_JOIN) 3118 3119 if not skip_join_token and not join: 3120 self._retreat(index) 3121 kind = None 3122 method = None 3123 side = None 3124 3125 outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY, False) 3126 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY, False) 3127 3128 if not skip_join_token and not join and not outer_apply and not cross_apply: 3129 return None 3130 3131 kwargs: t.Dict[str, t.Any] = {"this": self._parse_table(parse_bracket=parse_bracket)} 3132 3133 if method: 3134 kwargs["method"] = method.text 3135 if side: 3136 kwargs["side"] = side.text 3137 if kind: 3138 kwargs["kind"] = kind.text 3139 if hint: 3140 kwargs["hint"] = hint 3141 3142 if self._match(TokenType.MATCH_CONDITION): 3143 kwargs["match_condition"] = self._parse_wrapped(self._parse_comparison) 3144 3145 if self._match(TokenType.ON): 3146 kwargs["on"] = self._parse_assignment() 3147 elif self._match(TokenType.USING): 3148 kwargs["using"] = self._parse_wrapped_id_vars() 3149 elif not isinstance(kwargs["this"], exp.Unnest) and not ( 3150 kind and kind.token_type == TokenType.CROSS 3151 ): 3152 index = self._index 3153 joins: t.Optional[list] = list(self._parse_joins()) 3154 3155 if joins and self._match(TokenType.ON): 3156 kwargs["on"] = self._parse_assignment() 3157 elif joins and self._match(TokenType.USING): 3158 kwargs["using"] = self._parse_wrapped_id_vars() 3159 else: 3160 joins = None 3161 self._retreat(index) 3162 3163 kwargs["this"].set("joins", joins if joins else None) 3164 3165 comments = [c for token in (method, side, kind) if token for c in token.comments] 3166 return self.expression(exp.Join, comments=comments, **kwargs) 3167 3168 def _parse_opclass(self) -> t.Optional[exp.Expression]: 3169 this = self._parse_assignment() 3170 3171 if self._match_texts(self.OPCLASS_FOLLOW_KEYWORDS, advance=False): 3172 return this 3173 3174 if not self._match_set(self.OPTYPE_FOLLOW_TOKENS, advance=False): 3175 return self.expression(exp.Opclass, this=this, expression=self._parse_table_parts()) 3176 3177 return this 3178 3179 def _parse_index_params(self) -> exp.IndexParameters: 3180 using = self._parse_var(any_token=True) if self._match(TokenType.USING) else None 3181 3182 if self._match(TokenType.L_PAREN, advance=False): 3183 columns = self._parse_wrapped_csv(self._parse_with_operator) 3184 else: 3185 columns = None 3186 3187 include = self._parse_wrapped_id_vars() if self._match_text_seq("INCLUDE") else None 3188 partition_by = self._parse_partition_by() 3189 with_storage = self._match(TokenType.WITH) and self._parse_wrapped_properties() 3190 tablespace = ( 3191 self._parse_var(any_token=True) 3192 if self._match_text_seq("USING", "INDEX", "TABLESPACE") 3193 else None 3194 ) 3195 where = self._parse_where() 3196 3197 on = self._parse_field() if self._match(TokenType.ON) else None 3198 3199 return self.expression( 3200 exp.IndexParameters, 3201 using=using, 3202 columns=columns, 3203 include=include, 3204 partition_by=partition_by, 3205 where=where, 3206 with_storage=with_storage, 3207 tablespace=tablespace, 3208 on=on, 3209 ) 3210 3211 def _parse_index( 3212 self, index: t.Optional[exp.Expression] = None, anonymous: bool = False 3213 ) -> t.Optional[exp.Index]: 3214 if index or anonymous: 3215 unique = None 3216 primary = None 3217 amp = None 3218 3219 self._match(TokenType.ON) 3220 self._match(TokenType.TABLE) # hive 3221 table = self._parse_table_parts(schema=True) 3222 else: 3223 unique = self._match(TokenType.UNIQUE) 3224 primary = self._match_text_seq("PRIMARY") 3225 amp = self._match_text_seq("AMP") 3226 3227 if not self._match(TokenType.INDEX): 3228 return None 3229 3230 index = self._parse_id_var() 3231 table = None 3232 3233 params = self._parse_index_params() 3234 3235 return self.expression( 3236 exp.Index, 3237 this=index, 3238 table=table, 3239 unique=unique, 3240 primary=primary, 3241 amp=amp, 3242 params=params, 3243 ) 3244 3245 def _parse_table_hints(self) -> t.Optional[t.List[exp.Expression]]: 3246 hints: t.List[exp.Expression] = [] 3247 if self._match_pair(TokenType.WITH, TokenType.L_PAREN): 3248 # https://learn.microsoft.com/en-us/sql/t-sql/queries/hints-transact-sql-table?view=sql-server-ver16 3249 hints.append( 3250 self.expression( 3251 exp.WithTableHint, 3252 expressions=self._parse_csv( 3253 lambda: self._parse_function() or self._parse_var(any_token=True) 3254 ), 3255 ) 3256 ) 3257 self._match_r_paren() 3258 else: 3259 # https://dev.mysql.com/doc/refman/8.0/en/index-hints.html 3260 while self._match_set(self.TABLE_INDEX_HINT_TOKENS): 3261 hint = exp.IndexTableHint(this=self._prev.text.upper()) 3262 3263 self._match_set((TokenType.INDEX, TokenType.KEY)) 3264 if self._match(TokenType.FOR): 3265 hint.set("target", self._advance_any() and self._prev.text.upper()) 3266 3267 hint.set("expressions", self._parse_wrapped_id_vars()) 3268 hints.append(hint) 3269 3270 return hints or None 3271 3272 def _parse_table_part(self, schema: bool = False) -> t.Optional[exp.Expression]: 3273 return ( 3274 (not schema and self._parse_function(optional_parens=False)) 3275 or self._parse_id_var(any_token=False) 3276 or self._parse_string_as_identifier() 3277 or self._parse_placeholder() 3278 ) 3279 3280 def _parse_table_parts( 3281 self, schema: bool = False, is_db_reference: bool = False, wildcard: bool = False 3282 ) -> exp.Table: 3283 catalog = None 3284 db = None 3285 table: t.Optional[exp.Expression | str] = self._parse_table_part(schema=schema) 3286 3287 while self._match(TokenType.DOT): 3288 if catalog: 3289 # This allows nesting the table in arbitrarily many dot expressions if needed 3290 table = self.expression( 3291 exp.Dot, this=table, expression=self._parse_table_part(schema=schema) 3292 ) 3293 else: 3294 catalog = db 3295 db = table 3296 # "" used for tsql FROM a..b case 3297 table = self._parse_table_part(schema=schema) or "" 3298 3299 if ( 3300 wildcard 3301 and self._is_connected() 3302 and (isinstance(table, exp.Identifier) or not table) 3303 and self._match(TokenType.STAR) 3304 ): 3305 if isinstance(table, exp.Identifier): 3306 table.args["this"] += "*" 3307 else: 3308 table = exp.Identifier(this="*") 3309 3310 # We bubble up comments from the Identifier to the Table 3311 comments = table.pop_comments() if isinstance(table, exp.Expression) else None 3312 3313 if is_db_reference: 3314 catalog = db 3315 db = table 3316 table = None 3317 3318 if not table and not is_db_reference: 3319 self.raise_error(f"Expected table name but got {self._curr}") 3320 if not db and is_db_reference: 3321 self.raise_error(f"Expected database name but got {self._curr}") 3322 3323 return self.expression( 3324 exp.Table, 3325 comments=comments, 3326 this=table, 3327 db=db, 3328 catalog=catalog, 3329 pivots=self._parse_pivots(), 3330 ) 3331 3332 def _parse_table( 3333 self, 3334 schema: bool = False, 3335 joins: bool = False, 3336 alias_tokens: t.Optional[t.Collection[TokenType]] = None, 3337 parse_bracket: bool = False, 3338 is_db_reference: bool = False, 3339 parse_partition: bool = False, 3340 ) -> t.Optional[exp.Expression]: 3341 lateral = self._parse_lateral() 3342 if lateral: 3343 return lateral 3344 3345 unnest = self._parse_unnest() 3346 if unnest: 3347 return unnest 3348 3349 values = self._parse_derived_table_values() 3350 if values: 3351 return values 3352 3353 subquery = self._parse_select(table=True) 3354 if subquery: 3355 if not subquery.args.get("pivots"): 3356 subquery.set("pivots", self._parse_pivots()) 3357 return subquery 3358 3359 bracket = parse_bracket and self._parse_bracket(None) 3360 bracket = self.expression(exp.Table, this=bracket) if bracket else None 3361 3362 only = self._match(TokenType.ONLY) 3363 3364 this = t.cast( 3365 exp.Expression, 3366 bracket 3367 or self._parse_bracket( 3368 self._parse_table_parts(schema=schema, is_db_reference=is_db_reference) 3369 ), 3370 ) 3371 3372 if only: 3373 this.set("only", only) 3374 3375 # Postgres supports a wildcard (table) suffix operator, which is a no-op in this context 3376 self._match_text_seq("*") 3377 3378 parse_partition = parse_partition or self.SUPPORTS_PARTITION_SELECTION 3379 if parse_partition and self._match(TokenType.PARTITION, advance=False): 3380 this.set("partition", self._parse_partition()) 3381 3382 if schema: 3383 return self._parse_schema(this=this) 3384 3385 version = self._parse_version() 3386 3387 if version: 3388 this.set("version", version) 3389 3390 if self.dialect.ALIAS_POST_TABLESAMPLE: 3391 table_sample = self._parse_table_sample() 3392 3393 alias = self._parse_table_alias(alias_tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 3394 if alias: 3395 this.set("alias", alias) 3396 3397 if isinstance(this, exp.Table) and self._match_text_seq("AT"): 3398 return self.expression( 3399 exp.AtIndex, this=this.to_column(copy=False), expression=self._parse_id_var() 3400 ) 3401 3402 this.set("hints", self._parse_table_hints()) 3403 3404 if not this.args.get("pivots"): 3405 this.set("pivots", self._parse_pivots()) 3406 3407 if not self.dialect.ALIAS_POST_TABLESAMPLE: 3408 table_sample = self._parse_table_sample() 3409 3410 if table_sample: 3411 table_sample.set("this", this) 3412 this = table_sample 3413 3414 if joins: 3415 for join in self._parse_joins(): 3416 this.append("joins", join) 3417 3418 if self._match_pair(TokenType.WITH, TokenType.ORDINALITY): 3419 this.set("ordinality", True) 3420 this.set("alias", self._parse_table_alias()) 3421 3422 return this 3423 3424 def _parse_version(self) -> t.Optional[exp.Version]: 3425 if self._match(TokenType.TIMESTAMP_SNAPSHOT): 3426 this = "TIMESTAMP" 3427 elif self._match(TokenType.VERSION_SNAPSHOT): 3428 this = "VERSION" 3429 else: 3430 return None 3431 3432 if self._match_set((TokenType.FROM, TokenType.BETWEEN)): 3433 kind = self._prev.text.upper() 3434 start = self._parse_bitwise() 3435 self._match_texts(("TO", "AND")) 3436 end = self._parse_bitwise() 3437 expression: t.Optional[exp.Expression] = self.expression( 3438 exp.Tuple, expressions=[start, end] 3439 ) 3440 elif self._match_text_seq("CONTAINED", "IN"): 3441 kind = "CONTAINED IN" 3442 expression = self.expression( 3443 exp.Tuple, expressions=self._parse_wrapped_csv(self._parse_bitwise) 3444 ) 3445 elif self._match(TokenType.ALL): 3446 kind = "ALL" 3447 expression = None 3448 else: 3449 self._match_text_seq("AS", "OF") 3450 kind = "AS OF" 3451 expression = self._parse_type() 3452 3453 return self.expression(exp.Version, this=this, expression=expression, kind=kind) 3454 3455 def _parse_unnest(self, with_alias: bool = True) -> t.Optional[exp.Unnest]: 3456 if not self._match(TokenType.UNNEST): 3457 return None 3458 3459 expressions = self._parse_wrapped_csv(self._parse_equality) 3460 offset = self._match_pair(TokenType.WITH, TokenType.ORDINALITY) 3461 3462 alias = self._parse_table_alias() if with_alias else None 3463 3464 if alias: 3465 if self.dialect.UNNEST_COLUMN_ONLY: 3466 if alias.args.get("columns"): 3467 self.raise_error("Unexpected extra column alias in unnest.") 3468 3469 alias.set("columns", [alias.this]) 3470 alias.set("this", None) 3471 3472 columns = alias.args.get("columns") or [] 3473 if offset and len(expressions) < len(columns): 3474 offset = columns.pop() 3475 3476 if not offset and self._match_pair(TokenType.WITH, TokenType.OFFSET): 3477 self._match(TokenType.ALIAS) 3478 offset = self._parse_id_var( 3479 any_token=False, tokens=self.UNNEST_OFFSET_ALIAS_TOKENS 3480 ) or exp.to_identifier("offset") 3481 3482 return self.expression(exp.Unnest, expressions=expressions, alias=alias, offset=offset) 3483 3484 def _parse_derived_table_values(self) -> t.Optional[exp.Values]: 3485 is_derived = self._match_pair(TokenType.L_PAREN, TokenType.VALUES) 3486 if not is_derived and not self._match_text_seq("VALUES"): 3487 return None 3488 3489 expressions = self._parse_csv(self._parse_value) 3490 alias = self._parse_table_alias() 3491 3492 if is_derived: 3493 self._match_r_paren() 3494 3495 return self.expression( 3496 exp.Values, expressions=expressions, alias=alias or self._parse_table_alias() 3497 ) 3498 3499 def _parse_table_sample(self, as_modifier: bool = False) -> t.Optional[exp.TableSample]: 3500 if not self._match(TokenType.TABLE_SAMPLE) and not ( 3501 as_modifier and self._match_text_seq("USING", "SAMPLE") 3502 ): 3503 return None 3504 3505 bucket_numerator = None 3506 bucket_denominator = None 3507 bucket_field = None 3508 percent = None 3509 size = None 3510 seed = None 3511 3512 method = self._parse_var(tokens=(TokenType.ROW,), upper=True) 3513 matched_l_paren = self._match(TokenType.L_PAREN) 3514 3515 if self.TABLESAMPLE_CSV: 3516 num = None 3517 expressions = self._parse_csv(self._parse_primary) 3518 else: 3519 expressions = None 3520 num = ( 3521 self._parse_factor() 3522 if self._match(TokenType.NUMBER, advance=False) 3523 else self._parse_primary() or self._parse_placeholder() 3524 ) 3525 3526 if self._match_text_seq("BUCKET"): 3527 bucket_numerator = self._parse_number() 3528 self._match_text_seq("OUT", "OF") 3529 bucket_denominator = bucket_denominator = self._parse_number() 3530 self._match(TokenType.ON) 3531 bucket_field = self._parse_field() 3532 elif self._match_set((TokenType.PERCENT, TokenType.MOD)): 3533 percent = num 3534 elif self._match(TokenType.ROWS) or not self.dialect.TABLESAMPLE_SIZE_IS_PERCENT: 3535 size = num 3536 else: 3537 percent = num 3538 3539 if matched_l_paren: 3540 self._match_r_paren() 3541 3542 if self._match(TokenType.L_PAREN): 3543 method = self._parse_var(upper=True) 3544 seed = self._match(TokenType.COMMA) and self._parse_number() 3545 self._match_r_paren() 3546 elif self._match_texts(("SEED", "REPEATABLE")): 3547 seed = self._parse_wrapped(self._parse_number) 3548 3549 if not method and self.DEFAULT_SAMPLING_METHOD: 3550 method = exp.var(self.DEFAULT_SAMPLING_METHOD) 3551 3552 return self.expression( 3553 exp.TableSample, 3554 expressions=expressions, 3555 method=method, 3556 bucket_numerator=bucket_numerator, 3557 bucket_denominator=bucket_denominator, 3558 bucket_field=bucket_field, 3559 percent=percent, 3560 size=size, 3561 seed=seed, 3562 ) 3563 3564 def _parse_pivots(self) -> t.Optional[t.List[exp.Pivot]]: 3565 return list(iter(self._parse_pivot, None)) or None 3566 3567 def _parse_joins(self) -> t.Iterator[exp.Join]: 3568 return iter(self._parse_join, None) 3569 3570 # https://duckdb.org/docs/sql/statements/pivot 3571 def _parse_simplified_pivot(self) -> exp.Pivot: 3572 def _parse_on() -> t.Optional[exp.Expression]: 3573 this = self._parse_bitwise() 3574 return self._parse_in(this) if self._match(TokenType.IN) else this 3575 3576 this = self._parse_table() 3577 expressions = self._match(TokenType.ON) and self._parse_csv(_parse_on) 3578 using = self._match(TokenType.USING) and self._parse_csv( 3579 lambda: self._parse_alias(self._parse_function()) 3580 ) 3581 group = self._parse_group() 3582 return self.expression( 3583 exp.Pivot, this=this, expressions=expressions, using=using, group=group 3584 ) 3585 3586 def _parse_pivot_in(self) -> exp.In: 3587 def _parse_aliased_expression() -> t.Optional[exp.Expression]: 3588 this = self._parse_assignment() 3589 3590 self._match(TokenType.ALIAS) 3591 alias = self._parse_field() 3592 if alias: 3593 return self.expression(exp.PivotAlias, this=this, alias=alias) 3594 3595 return this 3596 3597 value = self._parse_column() 3598 3599 if not self._match_pair(TokenType.IN, TokenType.L_PAREN): 3600 self.raise_error("Expecting IN (") 3601 3602 aliased_expressions = self._parse_csv(_parse_aliased_expression) 3603 3604 self._match_r_paren() 3605 return self.expression(exp.In, this=value, expressions=aliased_expressions) 3606 3607 def _parse_pivot(self) -> t.Optional[exp.Pivot]: 3608 index = self._index 3609 include_nulls = None 3610 3611 if self._match(TokenType.PIVOT): 3612 unpivot = False 3613 elif self._match(TokenType.UNPIVOT): 3614 unpivot = True 3615 3616 # https://docs.databricks.com/en/sql/language-manual/sql-ref-syntax-qry-select-unpivot.html#syntax 3617 if self._match_text_seq("INCLUDE", "NULLS"): 3618 include_nulls = True 3619 elif self._match_text_seq("EXCLUDE", "NULLS"): 3620 include_nulls = False 3621 else: 3622 return None 3623 3624 expressions = [] 3625 3626 if not self._match(TokenType.L_PAREN): 3627 self._retreat(index) 3628 return None 3629 3630 if unpivot: 3631 expressions = self._parse_csv(self._parse_column) 3632 else: 3633 expressions = self._parse_csv(lambda: self._parse_alias(self._parse_function())) 3634 3635 if not expressions: 3636 self.raise_error("Failed to parse PIVOT's aggregation list") 3637 3638 if not self._match(TokenType.FOR): 3639 self.raise_error("Expecting FOR") 3640 3641 field = self._parse_pivot_in() 3642 3643 self._match_r_paren() 3644 3645 pivot = self.expression( 3646 exp.Pivot, 3647 expressions=expressions, 3648 field=field, 3649 unpivot=unpivot, 3650 include_nulls=include_nulls, 3651 ) 3652 3653 if not self._match_set((TokenType.PIVOT, TokenType.UNPIVOT), advance=False): 3654 pivot.set("alias", self._parse_table_alias()) 3655 3656 if not unpivot: 3657 names = self._pivot_column_names(t.cast(t.List[exp.Expression], expressions)) 3658 3659 columns: t.List[exp.Expression] = [] 3660 for fld in pivot.args["field"].expressions: 3661 field_name = fld.sql() if self.IDENTIFY_PIVOT_STRINGS else fld.alias_or_name 3662 for name in names: 3663 if self.PREFIXED_PIVOT_COLUMNS: 3664 name = f"{name}_{field_name}" if name else field_name 3665 else: 3666 name = f"{field_name}_{name}" if name else field_name 3667 3668 columns.append(exp.to_identifier(name)) 3669 3670 pivot.set("columns", columns) 3671 3672 return pivot 3673 3674 def _pivot_column_names(self, aggregations: t.List[exp.Expression]) -> t.List[str]: 3675 return [agg.alias for agg in aggregations] 3676 3677 def _parse_prewhere(self, skip_where_token: bool = False) -> t.Optional[exp.PreWhere]: 3678 if not skip_where_token and not self._match(TokenType.PREWHERE): 3679 return None 3680 3681 return self.expression( 3682 exp.PreWhere, comments=self._prev_comments, this=self._parse_assignment() 3683 ) 3684 3685 def _parse_where(self, skip_where_token: bool = False) -> t.Optional[exp.Where]: 3686 if not skip_where_token and not self._match(TokenType.WHERE): 3687 return None 3688 3689 return self.expression( 3690 exp.Where, comments=self._prev_comments, this=self._parse_assignment() 3691 ) 3692 3693 def _parse_group(self, skip_group_by_token: bool = False) -> t.Optional[exp.Group]: 3694 if not skip_group_by_token and not self._match(TokenType.GROUP_BY): 3695 return None 3696 3697 elements: t.Dict[str, t.Any] = defaultdict(list) 3698 3699 if self._match(TokenType.ALL): 3700 elements["all"] = True 3701 elif self._match(TokenType.DISTINCT): 3702 elements["all"] = False 3703 3704 while True: 3705 expressions = self._parse_csv( 3706 lambda: None 3707 if self._match(TokenType.ROLLUP, advance=False) 3708 else self._parse_assignment() 3709 ) 3710 if expressions: 3711 elements["expressions"].extend(expressions) 3712 3713 grouping_sets = self._parse_grouping_sets() 3714 if grouping_sets: 3715 elements["grouping_sets"].extend(grouping_sets) 3716 3717 rollup = None 3718 cube = None 3719 totals = None 3720 3721 index = self._index 3722 with_ = self._match(TokenType.WITH) 3723 if self._match(TokenType.ROLLUP): 3724 rollup = with_ or self._parse_wrapped_csv(self._parse_column) 3725 elements["rollup"].extend(ensure_list(rollup)) 3726 3727 if self._match(TokenType.CUBE): 3728 cube = with_ or self._parse_wrapped_csv(self._parse_column) 3729 elements["cube"].extend(ensure_list(cube)) 3730 3731 if self._match_text_seq("TOTALS"): 3732 totals = True 3733 elements["totals"] = True # type: ignore 3734 3735 if not (grouping_sets or rollup or cube or totals): 3736 if with_: 3737 self._retreat(index) 3738 break 3739 3740 return self.expression(exp.Group, **elements) # type: ignore 3741 3742 def _parse_grouping_sets(self) -> t.Optional[t.List[exp.Expression]]: 3743 if not self._match(TokenType.GROUPING_SETS): 3744 return None 3745 3746 return self._parse_wrapped_csv(self._parse_grouping_set) 3747 3748 def _parse_grouping_set(self) -> t.Optional[exp.Expression]: 3749 if self._match(TokenType.L_PAREN): 3750 grouping_set = self._parse_csv(self._parse_column) 3751 self._match_r_paren() 3752 return self.expression(exp.Tuple, expressions=grouping_set) 3753 3754 return self._parse_column() 3755 3756 def _parse_having(self, skip_having_token: bool = False) -> t.Optional[exp.Having]: 3757 if not skip_having_token and not self._match(TokenType.HAVING): 3758 return None 3759 return self.expression(exp.Having, this=self._parse_assignment()) 3760 3761 def _parse_qualify(self) -> t.Optional[exp.Qualify]: 3762 if not self._match(TokenType.QUALIFY): 3763 return None 3764 return self.expression(exp.Qualify, this=self._parse_assignment()) 3765 3766 def _parse_connect(self, skip_start_token: bool = False) -> t.Optional[exp.Connect]: 3767 if skip_start_token: 3768 start = None 3769 elif self._match(TokenType.START_WITH): 3770 start = self._parse_assignment() 3771 else: 3772 return None 3773 3774 self._match(TokenType.CONNECT_BY) 3775 nocycle = self._match_text_seq("NOCYCLE") 3776 self.NO_PAREN_FUNCTION_PARSERS["PRIOR"] = lambda self: self.expression( 3777 exp.Prior, this=self._parse_bitwise() 3778 ) 3779 connect = self._parse_assignment() 3780 self.NO_PAREN_FUNCTION_PARSERS.pop("PRIOR") 3781 3782 if not start and self._match(TokenType.START_WITH): 3783 start = self._parse_assignment() 3784 3785 return self.expression(exp.Connect, start=start, connect=connect, nocycle=nocycle) 3786 3787 def _parse_name_as_expression(self) -> exp.Alias: 3788 return self.expression( 3789 exp.Alias, 3790 alias=self._parse_id_var(any_token=True), 3791 this=self._match(TokenType.ALIAS) and self._parse_assignment(), 3792 ) 3793 3794 def _parse_interpolate(self) -> t.Optional[t.List[exp.Expression]]: 3795 if self._match_text_seq("INTERPOLATE"): 3796 return self._parse_wrapped_csv(self._parse_name_as_expression) 3797 return None 3798 3799 def _parse_order( 3800 self, this: t.Optional[exp.Expression] = None, skip_order_token: bool = False 3801 ) -> t.Optional[exp.Expression]: 3802 siblings = None 3803 if not skip_order_token and not self._match(TokenType.ORDER_BY): 3804 if not self._match(TokenType.ORDER_SIBLINGS_BY): 3805 return this 3806 3807 siblings = True 3808 3809 return self.expression( 3810 exp.Order, 3811 this=this, 3812 expressions=self._parse_csv(self._parse_ordered), 3813 interpolate=self._parse_interpolate(), 3814 siblings=siblings, 3815 ) 3816 3817 def _parse_sort(self, exp_class: t.Type[E], token: TokenType) -> t.Optional[E]: 3818 if not self._match(token): 3819 return None 3820 return self.expression(exp_class, expressions=self._parse_csv(self._parse_ordered)) 3821 3822 def _parse_ordered( 3823 self, parse_method: t.Optional[t.Callable] = None 3824 ) -> t.Optional[exp.Ordered]: 3825 this = parse_method() if parse_method else self._parse_assignment() 3826 if not this: 3827 return None 3828 3829 asc = self._match(TokenType.ASC) 3830 desc = self._match(TokenType.DESC) or (asc and False) 3831 3832 is_nulls_first = self._match_text_seq("NULLS", "FIRST") 3833 is_nulls_last = self._match_text_seq("NULLS", "LAST") 3834 3835 nulls_first = is_nulls_first or False 3836 explicitly_null_ordered = is_nulls_first or is_nulls_last 3837 3838 if ( 3839 not explicitly_null_ordered 3840 and ( 3841 (not desc and self.dialect.NULL_ORDERING == "nulls_are_small") 3842 or (desc and self.dialect.NULL_ORDERING != "nulls_are_small") 3843 ) 3844 and self.dialect.NULL_ORDERING != "nulls_are_last" 3845 ): 3846 nulls_first = True 3847 3848 if self._match_text_seq("WITH", "FILL"): 3849 with_fill = self.expression( 3850 exp.WithFill, 3851 **{ # type: ignore 3852 "from": self._match(TokenType.FROM) and self._parse_bitwise(), 3853 "to": self._match_text_seq("TO") and self._parse_bitwise(), 3854 "step": self._match_text_seq("STEP") and self._parse_bitwise(), 3855 }, 3856 ) 3857 else: 3858 with_fill = None 3859 3860 return self.expression( 3861 exp.Ordered, this=this, desc=desc, nulls_first=nulls_first, with_fill=with_fill 3862 ) 3863 3864 def _parse_limit( 3865 self, 3866 this: t.Optional[exp.Expression] = None, 3867 top: bool = False, 3868 skip_limit_token: bool = False, 3869 ) -> t.Optional[exp.Expression]: 3870 if skip_limit_token or self._match(TokenType.TOP if top else TokenType.LIMIT): 3871 comments = self._prev_comments 3872 if top: 3873 limit_paren = self._match(TokenType.L_PAREN) 3874 expression = self._parse_term() if limit_paren else self._parse_number() 3875 3876 if limit_paren: 3877 self._match_r_paren() 3878 else: 3879 expression = self._parse_term() 3880 3881 if self._match(TokenType.COMMA): 3882 offset = expression 3883 expression = self._parse_term() 3884 else: 3885 offset = None 3886 3887 limit_exp = self.expression( 3888 exp.Limit, 3889 this=this, 3890 expression=expression, 3891 offset=offset, 3892 comments=comments, 3893 expressions=self._parse_limit_by(), 3894 ) 3895 3896 return limit_exp 3897 3898 if self._match(TokenType.FETCH): 3899 direction = self._match_set((TokenType.FIRST, TokenType.NEXT)) 3900 direction = self._prev.text.upper() if direction else "FIRST" 3901 3902 count = self._parse_field(tokens=self.FETCH_TOKENS) 3903 percent = self._match(TokenType.PERCENT) 3904 3905 self._match_set((TokenType.ROW, TokenType.ROWS)) 3906 3907 only = self._match_text_seq("ONLY") 3908 with_ties = self._match_text_seq("WITH", "TIES") 3909 3910 if only and with_ties: 3911 self.raise_error("Cannot specify both ONLY and WITH TIES in FETCH clause") 3912 3913 return self.expression( 3914 exp.Fetch, 3915 direction=direction, 3916 count=count, 3917 percent=percent, 3918 with_ties=with_ties, 3919 ) 3920 3921 return this 3922 3923 def _parse_offset(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 3924 if not self._match(TokenType.OFFSET): 3925 return this 3926 3927 count = self._parse_term() 3928 self._match_set((TokenType.ROW, TokenType.ROWS)) 3929 3930 return self.expression( 3931 exp.Offset, this=this, expression=count, expressions=self._parse_limit_by() 3932 ) 3933 3934 def _parse_limit_by(self) -> t.Optional[t.List[exp.Expression]]: 3935 return self._match_text_seq("BY") and self._parse_csv(self._parse_bitwise) 3936 3937 def _parse_locks(self) -> t.List[exp.Lock]: 3938 locks = [] 3939 while True: 3940 if self._match_text_seq("FOR", "UPDATE"): 3941 update = True 3942 elif self._match_text_seq("FOR", "SHARE") or self._match_text_seq( 3943 "LOCK", "IN", "SHARE", "MODE" 3944 ): 3945 update = False 3946 else: 3947 break 3948 3949 expressions = None 3950 if self._match_text_seq("OF"): 3951 expressions = self._parse_csv(lambda: self._parse_table(schema=True)) 3952 3953 wait: t.Optional[bool | exp.Expression] = None 3954 if self._match_text_seq("NOWAIT"): 3955 wait = True 3956 elif self._match_text_seq("WAIT"): 3957 wait = self._parse_primary() 3958 elif self._match_text_seq("SKIP", "LOCKED"): 3959 wait = False 3960 3961 locks.append( 3962 self.expression(exp.Lock, update=update, expressions=expressions, wait=wait) 3963 ) 3964 3965 return locks 3966 3967 def _parse_set_operations(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3968 while this and self._match_set(self.SET_OPERATIONS): 3969 token_type = self._prev.token_type 3970 3971 if token_type == TokenType.UNION: 3972 operation: t.Type[exp.SetOperation] = exp.Union 3973 elif token_type == TokenType.EXCEPT: 3974 operation = exp.Except 3975 else: 3976 operation = exp.Intersect 3977 3978 comments = self._prev.comments 3979 distinct = self._match(TokenType.DISTINCT) or not self._match(TokenType.ALL) 3980 by_name = self._match_text_seq("BY", "NAME") 3981 expression = self._parse_select(nested=True, parse_set_operation=False) 3982 3983 this = self.expression( 3984 operation, 3985 comments=comments, 3986 this=this, 3987 distinct=distinct, 3988 by_name=by_name, 3989 expression=expression, 3990 ) 3991 3992 if isinstance(this, exp.SetOperation) and self.MODIFIERS_ATTACHED_TO_SET_OP: 3993 expression = this.expression 3994 3995 if expression: 3996 for arg in self.SET_OP_MODIFIERS: 3997 expr = expression.args.get(arg) 3998 if expr: 3999 this.set(arg, expr.pop()) 4000 4001 return this 4002 4003 def _parse_expression(self) -> t.Optional[exp.Expression]: 4004 return self._parse_alias(self._parse_assignment()) 4005 4006 def _parse_assignment(self) -> t.Optional[exp.Expression]: 4007 this = self._parse_disjunction() 4008 4009 while self._match_set(self.ASSIGNMENT): 4010 this = self.expression( 4011 self.ASSIGNMENT[self._prev.token_type], 4012 this=this, 4013 comments=self._prev_comments, 4014 expression=self._parse_assignment(), 4015 ) 4016 4017 return this 4018 4019 def _parse_disjunction(self) -> t.Optional[exp.Expression]: 4020 return self._parse_tokens(self._parse_conjunction, self.DISJUNCTION) 4021 4022 def _parse_conjunction(self) -> t.Optional[exp.Expression]: 4023 return self._parse_tokens(self._parse_equality, self.CONJUNCTION) 4024 4025 def _parse_equality(self) -> t.Optional[exp.Expression]: 4026 return self._parse_tokens(self._parse_comparison, self.EQUALITY) 4027 4028 def _parse_comparison(self) -> t.Optional[exp.Expression]: 4029 return self._parse_tokens(self._parse_range, self.COMPARISON) 4030 4031 def _parse_range(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 4032 this = this or self._parse_bitwise() 4033 negate = self._match(TokenType.NOT) 4034 4035 if self._match_set(self.RANGE_PARSERS): 4036 expression = self.RANGE_PARSERS[self._prev.token_type](self, this) 4037 if not expression: 4038 return this 4039 4040 this = expression 4041 elif self._match(TokenType.ISNULL): 4042 this = self.expression(exp.Is, this=this, expression=exp.Null()) 4043 4044 # Postgres supports ISNULL and NOTNULL for conditions. 4045 # https://blog.andreiavram.ro/postgresql-null-composite-type/ 4046 if self._match(TokenType.NOTNULL): 4047 this = self.expression(exp.Is, this=this, expression=exp.Null()) 4048 this = self.expression(exp.Not, this=this) 4049 4050 if negate: 4051 this = self.expression(exp.Not, this=this) 4052 4053 if self._match(TokenType.IS): 4054 this = self._parse_is(this) 4055 4056 return this 4057 4058 def _parse_is(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4059 index = self._index - 1 4060 negate = self._match(TokenType.NOT) 4061 4062 if self._match_text_seq("DISTINCT", "FROM"): 4063 klass = exp.NullSafeEQ if negate else exp.NullSafeNEQ 4064 return self.expression(klass, this=this, expression=self._parse_bitwise()) 4065 4066 expression = self._parse_null() or self._parse_boolean() 4067 if not expression: 4068 self._retreat(index) 4069 return None 4070 4071 this = self.expression(exp.Is, this=this, expression=expression) 4072 return self.expression(exp.Not, this=this) if negate else this 4073 4074 def _parse_in(self, this: t.Optional[exp.Expression], alias: bool = False) -> exp.In: 4075 unnest = self._parse_unnest(with_alias=False) 4076 if unnest: 4077 this = self.expression(exp.In, this=this, unnest=unnest) 4078 elif self._match_set((TokenType.L_PAREN, TokenType.L_BRACKET)): 4079 matched_l_paren = self._prev.token_type == TokenType.L_PAREN 4080 expressions = self._parse_csv(lambda: self._parse_select_or_expression(alias=alias)) 4081 4082 if len(expressions) == 1 and isinstance(expressions[0], exp.Query): 4083 this = self.expression(exp.In, this=this, query=expressions[0].subquery(copy=False)) 4084 else: 4085 this = self.expression(exp.In, this=this, expressions=expressions) 4086 4087 if matched_l_paren: 4088 self._match_r_paren(this) 4089 elif not self._match(TokenType.R_BRACKET, expression=this): 4090 self.raise_error("Expecting ]") 4091 else: 4092 this = self.expression(exp.In, this=this, field=self._parse_field()) 4093 4094 return this 4095 4096 def _parse_between(self, this: t.Optional[exp.Expression]) -> exp.Between: 4097 low = self._parse_bitwise() 4098 self._match(TokenType.AND) 4099 high = self._parse_bitwise() 4100 return self.expression(exp.Between, this=this, low=low, high=high) 4101 4102 def _parse_escape(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4103 if not self._match(TokenType.ESCAPE): 4104 return this 4105 return self.expression(exp.Escape, this=this, expression=self._parse_string()) 4106 4107 def _parse_interval(self, match_interval: bool = True) -> t.Optional[exp.Add | exp.Interval]: 4108 index = self._index 4109 4110 if not self._match(TokenType.INTERVAL) and match_interval: 4111 return None 4112 4113 if self._match(TokenType.STRING, advance=False): 4114 this = self._parse_primary() 4115 else: 4116 this = self._parse_term() 4117 4118 if not this or ( 4119 isinstance(this, exp.Column) 4120 and not this.table 4121 and not this.this.quoted 4122 and this.name.upper() == "IS" 4123 ): 4124 self._retreat(index) 4125 return None 4126 4127 unit = self._parse_function() or ( 4128 not self._match(TokenType.ALIAS, advance=False) 4129 and self._parse_var(any_token=True, upper=True) 4130 ) 4131 4132 # Most dialects support, e.g., the form INTERVAL '5' day, thus we try to parse 4133 # each INTERVAL expression into this canonical form so it's easy to transpile 4134 if this and this.is_number: 4135 this = exp.Literal.string(this.name) 4136 elif this and this.is_string: 4137 parts = exp.INTERVAL_STRING_RE.findall(this.name) 4138 if len(parts) == 1: 4139 if unit: 4140 # Unconsume the eagerly-parsed unit, since the real unit was part of the string 4141 self._retreat(self._index - 1) 4142 4143 this = exp.Literal.string(parts[0][0]) 4144 unit = self.expression(exp.Var, this=parts[0][1].upper()) 4145 4146 if self.INTERVAL_SPANS and self._match_text_seq("TO"): 4147 unit = self.expression( 4148 exp.IntervalSpan, this=unit, expression=self._parse_var(any_token=True, upper=True) 4149 ) 4150 4151 interval = self.expression(exp.Interval, this=this, unit=unit) 4152 4153 index = self._index 4154 self._match(TokenType.PLUS) 4155 4156 # Convert INTERVAL 'val_1' unit_1 [+] ... [+] 'val_n' unit_n into a sum of intervals 4157 if self._match_set((TokenType.STRING, TokenType.NUMBER), advance=False): 4158 return self.expression( 4159 exp.Add, this=interval, expression=self._parse_interval(match_interval=False) 4160 ) 4161 4162 self._retreat(index) 4163 return interval 4164 4165 def _parse_bitwise(self) -> t.Optional[exp.Expression]: 4166 this = self._parse_term() 4167 4168 while True: 4169 if self._match_set(self.BITWISE): 4170 this = self.expression( 4171 self.BITWISE[self._prev.token_type], 4172 this=this, 4173 expression=self._parse_term(), 4174 ) 4175 elif self.dialect.DPIPE_IS_STRING_CONCAT and self._match(TokenType.DPIPE): 4176 this = self.expression( 4177 exp.DPipe, 4178 this=this, 4179 expression=self._parse_term(), 4180 safe=not self.dialect.STRICT_STRING_CONCAT, 4181 ) 4182 elif self._match(TokenType.DQMARK): 4183 this = self.expression(exp.Coalesce, this=this, expressions=self._parse_term()) 4184 elif self._match_pair(TokenType.LT, TokenType.LT): 4185 this = self.expression( 4186 exp.BitwiseLeftShift, this=this, expression=self._parse_term() 4187 ) 4188 elif self._match_pair(TokenType.GT, TokenType.GT): 4189 this = self.expression( 4190 exp.BitwiseRightShift, this=this, expression=self._parse_term() 4191 ) 4192 else: 4193 break 4194 4195 return this 4196 4197 def _parse_term(self) -> t.Optional[exp.Expression]: 4198 return self._parse_tokens(self._parse_factor, self.TERM) 4199 4200 def _parse_factor(self) -> t.Optional[exp.Expression]: 4201 parse_method = self._parse_exponent if self.EXPONENT else self._parse_unary 4202 this = parse_method() 4203 4204 while self._match_set(self.FACTOR): 4205 klass = self.FACTOR[self._prev.token_type] 4206 comments = self._prev_comments 4207 expression = parse_method() 4208 4209 if not expression and klass is exp.IntDiv and self._prev.text.isalpha(): 4210 self._retreat(self._index - 1) 4211 return this 4212 4213 this = self.expression(klass, this=this, comments=comments, expression=expression) 4214 4215 if isinstance(this, exp.Div): 4216 this.args["typed"] = self.dialect.TYPED_DIVISION 4217 this.args["safe"] = self.dialect.SAFE_DIVISION 4218 4219 return this 4220 4221 def _parse_exponent(self) -> t.Optional[exp.Expression]: 4222 return self._parse_tokens(self._parse_unary, self.EXPONENT) 4223 4224 def _parse_unary(self) -> t.Optional[exp.Expression]: 4225 if self._match_set(self.UNARY_PARSERS): 4226 return self.UNARY_PARSERS[self._prev.token_type](self) 4227 return self._parse_at_time_zone(self._parse_type()) 4228 4229 def _parse_type( 4230 self, parse_interval: bool = True, fallback_to_identifier: bool = False 4231 ) -> t.Optional[exp.Expression]: 4232 interval = parse_interval and self._parse_interval() 4233 if interval: 4234 return interval 4235 4236 index = self._index 4237 data_type = self._parse_types(check_func=True, allow_identifiers=False) 4238 4239 if data_type: 4240 index2 = self._index 4241 this = self._parse_primary() 4242 4243 if isinstance(this, exp.Literal): 4244 parser = self.TYPE_LITERAL_PARSERS.get(data_type.this) 4245 if parser: 4246 return parser(self, this, data_type) 4247 4248 return self.expression(exp.Cast, this=this, to=data_type) 4249 4250 # The expressions arg gets set by the parser when we have something like DECIMAL(38, 0) 4251 # in the input SQL. In that case, we'll produce these tokens: DECIMAL ( 38 , 0 ) 4252 # 4253 # If the index difference here is greater than 1, that means the parser itself must have 4254 # consumed additional tokens such as the DECIMAL scale and precision in the above example. 4255 # 4256 # If it's not greater than 1, then it must be 1, because we've consumed at least the type 4257 # keyword, meaning that the expressions arg of the DataType must have gotten set by a 4258 # callable in the TYPE_CONVERTERS mapping. For example, Snowflake converts DECIMAL to 4259 # DECIMAL(38, 0)) in order to facilitate the data type's transpilation. 4260 # 4261 # In these cases, we don't really want to return the converted type, but instead retreat 4262 # and try to parse a Column or Identifier in the section below. 4263 if data_type.expressions and index2 - index > 1: 4264 self._retreat(index2) 4265 return self._parse_column_ops(data_type) 4266 4267 self._retreat(index) 4268 4269 if fallback_to_identifier: 4270 return self._parse_id_var() 4271 4272 this = self._parse_column() 4273 return this and self._parse_column_ops(this) 4274 4275 def _parse_type_size(self) -> t.Optional[exp.DataTypeParam]: 4276 this = self._parse_type() 4277 if not this: 4278 return None 4279 4280 if isinstance(this, exp.Column) and not this.table: 4281 this = exp.var(this.name.upper()) 4282 4283 return self.expression( 4284 exp.DataTypeParam, this=this, expression=self._parse_var(any_token=True) 4285 ) 4286 4287 def _parse_types( 4288 self, check_func: bool = False, schema: bool = False, allow_identifiers: bool = True 4289 ) -> t.Optional[exp.Expression]: 4290 index = self._index 4291 4292 this: t.Optional[exp.Expression] = None 4293 prefix = self._match_text_seq("SYSUDTLIB", ".") 4294 4295 if not self._match_set(self.TYPE_TOKENS): 4296 identifier = allow_identifiers and self._parse_id_var( 4297 any_token=False, tokens=(TokenType.VAR,) 4298 ) 4299 if isinstance(identifier, exp.Identifier): 4300 tokens = self.dialect.tokenize(identifier.sql(dialect=self.dialect)) 4301 4302 if len(tokens) != 1: 4303 self.raise_error("Unexpected identifier", self._prev) 4304 4305 if tokens[0].token_type in self.TYPE_TOKENS: 4306 self._prev = tokens[0] 4307 elif self.dialect.SUPPORTS_USER_DEFINED_TYPES: 4308 type_name = identifier.name 4309 4310 while self._match(TokenType.DOT): 4311 type_name = f"{type_name}.{self._advance_any() and self._prev.text}" 4312 4313 this = exp.DataType.build(type_name, udt=True) 4314 else: 4315 self._retreat(self._index - 1) 4316 return None 4317 else: 4318 return None 4319 4320 type_token = self._prev.token_type 4321 4322 if type_token == TokenType.PSEUDO_TYPE: 4323 return self.expression(exp.PseudoType, this=self._prev.text.upper()) 4324 4325 if type_token == TokenType.OBJECT_IDENTIFIER: 4326 return self.expression(exp.ObjectIdentifier, this=self._prev.text.upper()) 4327 4328 # https://materialize.com/docs/sql/types/map/ 4329 if type_token == TokenType.MAP and self._match(TokenType.L_BRACKET): 4330 key_type = self._parse_types( 4331 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 4332 ) 4333 if not self._match(TokenType.FARROW): 4334 self._retreat(index) 4335 return None 4336 4337 value_type = self._parse_types( 4338 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 4339 ) 4340 if not self._match(TokenType.R_BRACKET): 4341 self._retreat(index) 4342 return None 4343 4344 return exp.DataType( 4345 this=exp.DataType.Type.MAP, 4346 expressions=[key_type, value_type], 4347 nested=True, 4348 prefix=prefix, 4349 ) 4350 4351 nested = type_token in self.NESTED_TYPE_TOKENS 4352 is_struct = type_token in self.STRUCT_TYPE_TOKENS 4353 is_aggregate = type_token in self.AGGREGATE_TYPE_TOKENS 4354 expressions = None 4355 maybe_func = False 4356 4357 if self._match(TokenType.L_PAREN): 4358 if is_struct: 4359 expressions = self._parse_csv(lambda: self._parse_struct_types(type_required=True)) 4360 elif nested: 4361 expressions = self._parse_csv( 4362 lambda: self._parse_types( 4363 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 4364 ) 4365 ) 4366 elif type_token in self.ENUM_TYPE_TOKENS: 4367 expressions = self._parse_csv(self._parse_equality) 4368 elif is_aggregate: 4369 func_or_ident = self._parse_function(anonymous=True) or self._parse_id_var( 4370 any_token=False, tokens=(TokenType.VAR,) 4371 ) 4372 if not func_or_ident or not self._match(TokenType.COMMA): 4373 return None 4374 expressions = self._parse_csv( 4375 lambda: self._parse_types( 4376 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 4377 ) 4378 ) 4379 expressions.insert(0, func_or_ident) 4380 else: 4381 expressions = self._parse_csv(self._parse_type_size) 4382 4383 if not expressions or not self._match(TokenType.R_PAREN): 4384 self._retreat(index) 4385 return None 4386 4387 maybe_func = True 4388 4389 values: t.Optional[t.List[exp.Expression]] = None 4390 4391 if nested and self._match(TokenType.LT): 4392 if is_struct: 4393 expressions = self._parse_csv(lambda: self._parse_struct_types(type_required=True)) 4394 else: 4395 expressions = self._parse_csv( 4396 lambda: self._parse_types( 4397 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 4398 ) 4399 ) 4400 4401 if not self._match(TokenType.GT): 4402 self.raise_error("Expecting >") 4403 4404 if self._match_set((TokenType.L_BRACKET, TokenType.L_PAREN)): 4405 values = self._parse_csv(self._parse_assignment) 4406 self._match_set((TokenType.R_BRACKET, TokenType.R_PAREN)) 4407 4408 if type_token in self.TIMESTAMPS: 4409 if self._match_text_seq("WITH", "TIME", "ZONE"): 4410 maybe_func = False 4411 tz_type = ( 4412 exp.DataType.Type.TIMETZ 4413 if type_token in self.TIMES 4414 else exp.DataType.Type.TIMESTAMPTZ 4415 ) 4416 this = exp.DataType(this=tz_type, expressions=expressions) 4417 elif self._match_text_seq("WITH", "LOCAL", "TIME", "ZONE"): 4418 maybe_func = False 4419 this = exp.DataType(this=exp.DataType.Type.TIMESTAMPLTZ, expressions=expressions) 4420 elif self._match_text_seq("WITHOUT", "TIME", "ZONE"): 4421 maybe_func = False 4422 elif type_token == TokenType.INTERVAL: 4423 unit = self._parse_var(upper=True) 4424 if unit: 4425 if self._match_text_seq("TO"): 4426 unit = exp.IntervalSpan(this=unit, expression=self._parse_var(upper=True)) 4427 4428 this = self.expression(exp.DataType, this=self.expression(exp.Interval, unit=unit)) 4429 else: 4430 this = self.expression(exp.DataType, this=exp.DataType.Type.INTERVAL) 4431 4432 if maybe_func and check_func: 4433 index2 = self._index 4434 peek = self._parse_string() 4435 4436 if not peek: 4437 self._retreat(index) 4438 return None 4439 4440 self._retreat(index2) 4441 4442 if not this: 4443 if self._match_text_seq("UNSIGNED"): 4444 unsigned_type_token = self.SIGNED_TO_UNSIGNED_TYPE_TOKEN.get(type_token) 4445 if not unsigned_type_token: 4446 self.raise_error(f"Cannot convert {type_token.value} to unsigned.") 4447 4448 type_token = unsigned_type_token or type_token 4449 4450 this = exp.DataType( 4451 this=exp.DataType.Type[type_token.value], 4452 expressions=expressions, 4453 nested=nested, 4454 values=values, 4455 prefix=prefix, 4456 ) 4457 elif expressions: 4458 this.set("expressions", expressions) 4459 4460 # https://materialize.com/docs/sql/types/list/#type-name 4461 while self._match(TokenType.LIST): 4462 this = exp.DataType(this=exp.DataType.Type.LIST, expressions=[this], nested=True) 4463 4464 index = self._index 4465 4466 # Postgres supports the INT ARRAY[3] syntax as a synonym for INT[3] 4467 matched_array = self._match(TokenType.ARRAY) 4468 4469 while self._curr: 4470 matched_l_bracket = self._match(TokenType.L_BRACKET) 4471 if not matched_l_bracket and not matched_array: 4472 break 4473 4474 matched_array = False 4475 values = self._parse_csv(self._parse_assignment) or None 4476 if values and not schema: 4477 self._retreat(index) 4478 break 4479 4480 this = exp.DataType( 4481 this=exp.DataType.Type.ARRAY, expressions=[this], values=values, nested=True 4482 ) 4483 self._match(TokenType.R_BRACKET) 4484 4485 if self.TYPE_CONVERTERS and isinstance(this.this, exp.DataType.Type): 4486 converter = self.TYPE_CONVERTERS.get(this.this) 4487 if converter: 4488 this = converter(t.cast(exp.DataType, this)) 4489 4490 return this 4491 4492 def _parse_struct_types(self, type_required: bool = False) -> t.Optional[exp.Expression]: 4493 index = self._index 4494 4495 if ( 4496 self._curr 4497 and self._next 4498 and self._curr.token_type in self.TYPE_TOKENS 4499 and self._next.token_type in self.TYPE_TOKENS 4500 ): 4501 # Takes care of special cases like `STRUCT<list ARRAY<...>>` where the identifier is also a 4502 # type token. Without this, the list will be parsed as a type and we'll eventually crash 4503 this = self._parse_id_var() 4504 else: 4505 this = ( 4506 self._parse_type(parse_interval=False, fallback_to_identifier=True) 4507 or self._parse_id_var() 4508 ) 4509 4510 self._match(TokenType.COLON) 4511 4512 if ( 4513 type_required 4514 and not isinstance(this, exp.DataType) 4515 and not self._match_set(self.TYPE_TOKENS, advance=False) 4516 ): 4517 self._retreat(index) 4518 return self._parse_types() 4519 4520 return self._parse_column_def(this) 4521 4522 def _parse_at_time_zone(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4523 if not self._match_text_seq("AT", "TIME", "ZONE"): 4524 return this 4525 return self.expression(exp.AtTimeZone, this=this, zone=self._parse_unary()) 4526 4527 def _parse_column(self) -> t.Optional[exp.Expression]: 4528 this = self._parse_column_reference() 4529 column = self._parse_column_ops(this) if this else self._parse_bracket(this) 4530 4531 if self.dialect.SUPPORTS_COLUMN_JOIN_MARKS and column: 4532 column.set("join_mark", self._match(TokenType.JOIN_MARKER)) 4533 4534 return column 4535 4536 def _parse_column_reference(self) -> t.Optional[exp.Expression]: 4537 this = self._parse_field() 4538 if ( 4539 not this 4540 and self._match(TokenType.VALUES, advance=False) 4541 and self.VALUES_FOLLOWED_BY_PAREN 4542 and (not self._next or self._next.token_type != TokenType.L_PAREN) 4543 ): 4544 this = self._parse_id_var() 4545 4546 if isinstance(this, exp.Identifier): 4547 # We bubble up comments from the Identifier to the Column 4548 this = self.expression(exp.Column, comments=this.pop_comments(), this=this) 4549 4550 return this 4551 4552 def _parse_colon_as_json_extract( 4553 self, this: t.Optional[exp.Expression] 4554 ) -> t.Optional[exp.Expression]: 4555 casts = [] 4556 json_path = [] 4557 4558 while self._match(TokenType.COLON): 4559 start_index = self._index 4560 4561 # Snowflake allows reserved keywords as json keys but advance_any() excludes TokenType.SELECT from any_tokens=True 4562 path = self._parse_column_ops( 4563 self._parse_field(any_token=True, tokens=(TokenType.SELECT,)) 4564 ) 4565 4566 # The cast :: operator has a lower precedence than the extraction operator :, so 4567 # we rearrange the AST appropriately to avoid casting the JSON path 4568 while isinstance(path, exp.Cast): 4569 casts.append(path.to) 4570 path = path.this 4571 4572 if casts: 4573 dcolon_offset = next( 4574 i 4575 for i, t in enumerate(self._tokens[start_index:]) 4576 if t.token_type == TokenType.DCOLON 4577 ) 4578 end_token = self._tokens[start_index + dcolon_offset - 1] 4579 else: 4580 end_token = self._prev 4581 4582 if path: 4583 json_path.append(self._find_sql(self._tokens[start_index], end_token)) 4584 4585 if json_path: 4586 this = self.expression( 4587 exp.JSONExtract, 4588 this=this, 4589 expression=self.dialect.to_json_path(exp.Literal.string(".".join(json_path))), 4590 ) 4591 4592 while casts: 4593 this = self.expression(exp.Cast, this=this, to=casts.pop()) 4594 4595 return this 4596 4597 def _parse_column_ops(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4598 this = self._parse_bracket(this) 4599 4600 while self._match_set(self.COLUMN_OPERATORS): 4601 op_token = self._prev.token_type 4602 op = self.COLUMN_OPERATORS.get(op_token) 4603 4604 if op_token == TokenType.DCOLON: 4605 field = self._parse_types() 4606 if not field: 4607 self.raise_error("Expected type") 4608 elif op and self._curr: 4609 field = self._parse_column_reference() 4610 else: 4611 field = self._parse_field(any_token=True, anonymous_func=True) 4612 4613 if isinstance(field, exp.Func) and this: 4614 # bigquery allows function calls like x.y.count(...) 4615 # SAFE.SUBSTR(...) 4616 # https://cloud.google.com/bigquery/docs/reference/standard-sql/functions-reference#function_call_rules 4617 this = exp.replace_tree( 4618 this, 4619 lambda n: ( 4620 self.expression(exp.Dot, this=n.args.get("table"), expression=n.this) 4621 if n.table 4622 else n.this 4623 ) 4624 if isinstance(n, exp.Column) 4625 else n, 4626 ) 4627 4628 if op: 4629 this = op(self, this, field) 4630 elif isinstance(this, exp.Column) and not this.args.get("catalog"): 4631 this = self.expression( 4632 exp.Column, 4633 this=field, 4634 table=this.this, 4635 db=this.args.get("table"), 4636 catalog=this.args.get("db"), 4637 ) 4638 else: 4639 this = self.expression(exp.Dot, this=this, expression=field) 4640 4641 this = self._parse_bracket(this) 4642 4643 return self._parse_colon_as_json_extract(this) if self.COLON_IS_JSON_EXTRACT else this 4644 4645 def _parse_primary(self) -> t.Optional[exp.Expression]: 4646 if self._match_set(self.PRIMARY_PARSERS): 4647 token_type = self._prev.token_type 4648 primary = self.PRIMARY_PARSERS[token_type](self, self._prev) 4649 4650 if token_type == TokenType.STRING: 4651 expressions = [primary] 4652 while self._match(TokenType.STRING): 4653 expressions.append(exp.Literal.string(self._prev.text)) 4654 4655 if len(expressions) > 1: 4656 return self.expression(exp.Concat, expressions=expressions) 4657 4658 return primary 4659 4660 if self._match_pair(TokenType.DOT, TokenType.NUMBER): 4661 return exp.Literal.number(f"0.{self._prev.text}") 4662 4663 if self._match(TokenType.L_PAREN): 4664 comments = self._prev_comments 4665 query = self._parse_select() 4666 4667 if query: 4668 expressions = [query] 4669 else: 4670 expressions = self._parse_expressions() 4671 4672 this = self._parse_query_modifiers(seq_get(expressions, 0)) 4673 4674 if not this and self._match(TokenType.R_PAREN, advance=False): 4675 this = self.expression(exp.Tuple) 4676 elif isinstance(this, exp.UNWRAPPED_QUERIES): 4677 this = self._parse_subquery(this=this, parse_alias=False) 4678 elif isinstance(this, exp.Subquery): 4679 this = self._parse_subquery( 4680 this=self._parse_set_operations(this), parse_alias=False 4681 ) 4682 elif len(expressions) > 1 or self._prev.token_type == TokenType.COMMA: 4683 this = self.expression(exp.Tuple, expressions=expressions) 4684 else: 4685 this = self.expression(exp.Paren, this=this) 4686 4687 if this: 4688 this.add_comments(comments) 4689 4690 self._match_r_paren(expression=this) 4691 return this 4692 4693 return None 4694 4695 def _parse_field( 4696 self, 4697 any_token: bool = False, 4698 tokens: t.Optional[t.Collection[TokenType]] = None, 4699 anonymous_func: bool = False, 4700 ) -> t.Optional[exp.Expression]: 4701 if anonymous_func: 4702 field = ( 4703 self._parse_function(anonymous=anonymous_func, any_token=any_token) 4704 or self._parse_primary() 4705 ) 4706 else: 4707 field = self._parse_primary() or self._parse_function( 4708 anonymous=anonymous_func, any_token=any_token 4709 ) 4710 return field or self._parse_id_var(any_token=any_token, tokens=tokens) 4711 4712 def _parse_function( 4713 self, 4714 functions: t.Optional[t.Dict[str, t.Callable]] = None, 4715 anonymous: bool = False, 4716 optional_parens: bool = True, 4717 any_token: bool = False, 4718 ) -> t.Optional[exp.Expression]: 4719 # This allows us to also parse {fn <function>} syntax (Snowflake, MySQL support this) 4720 # See: https://community.snowflake.com/s/article/SQL-Escape-Sequences 4721 fn_syntax = False 4722 if ( 4723 self._match(TokenType.L_BRACE, advance=False) 4724 and self._next 4725 and self._next.text.upper() == "FN" 4726 ): 4727 self._advance(2) 4728 fn_syntax = True 4729 4730 func = self._parse_function_call( 4731 functions=functions, 4732 anonymous=anonymous, 4733 optional_parens=optional_parens, 4734 any_token=any_token, 4735 ) 4736 4737 if fn_syntax: 4738 self._match(TokenType.R_BRACE) 4739 4740 return func 4741 4742 def _parse_function_call( 4743 self, 4744 functions: t.Optional[t.Dict[str, t.Callable]] = None, 4745 anonymous: bool = False, 4746 optional_parens: bool = True, 4747 any_token: bool = False, 4748 ) -> t.Optional[exp.Expression]: 4749 if not self._curr: 4750 return None 4751 4752 comments = self._curr.comments 4753 token_type = self._curr.token_type 4754 this = self._curr.text 4755 upper = this.upper() 4756 4757 parser = self.NO_PAREN_FUNCTION_PARSERS.get(upper) 4758 if optional_parens and parser and token_type not in self.INVALID_FUNC_NAME_TOKENS: 4759 self._advance() 4760 return self._parse_window(parser(self)) 4761 4762 if not self._next or self._next.token_type != TokenType.L_PAREN: 4763 if optional_parens and token_type in self.NO_PAREN_FUNCTIONS: 4764 self._advance() 4765 return self.expression(self.NO_PAREN_FUNCTIONS[token_type]) 4766 4767 return None 4768 4769 if any_token: 4770 if token_type in self.RESERVED_TOKENS: 4771 return None 4772 elif token_type not in self.FUNC_TOKENS: 4773 return None 4774 4775 self._advance(2) 4776 4777 parser = self.FUNCTION_PARSERS.get(upper) 4778 if parser and not anonymous: 4779 this = parser(self) 4780 else: 4781 subquery_predicate = self.SUBQUERY_PREDICATES.get(token_type) 4782 4783 if subquery_predicate and self._curr.token_type in (TokenType.SELECT, TokenType.WITH): 4784 this = self.expression(subquery_predicate, this=self._parse_select()) 4785 self._match_r_paren() 4786 return this 4787 4788 if functions is None: 4789 functions = self.FUNCTIONS 4790 4791 function = functions.get(upper) 4792 4793 alias = upper in self.FUNCTIONS_WITH_ALIASED_ARGS 4794 args = self._parse_csv(lambda: self._parse_lambda(alias=alias)) 4795 4796 if alias: 4797 args = self._kv_to_prop_eq(args) 4798 4799 if function and not anonymous: 4800 if "dialect" in function.__code__.co_varnames: 4801 func = function(args, dialect=self.dialect) 4802 else: 4803 func = function(args) 4804 4805 func = self.validate_expression(func, args) 4806 if not self.dialect.NORMALIZE_FUNCTIONS: 4807 func.meta["name"] = this 4808 4809 this = func 4810 else: 4811 if token_type == TokenType.IDENTIFIER: 4812 this = exp.Identifier(this=this, quoted=True) 4813 this = self.expression(exp.Anonymous, this=this, expressions=args) 4814 4815 if isinstance(this, exp.Expression): 4816 this.add_comments(comments) 4817 4818 self._match_r_paren(this) 4819 return self._parse_window(this) 4820 4821 def _kv_to_prop_eq(self, expressions: t.List[exp.Expression]) -> t.List[exp.Expression]: 4822 transformed = [] 4823 4824 for e in expressions: 4825 if isinstance(e, self.KEY_VALUE_DEFINITIONS): 4826 if isinstance(e, exp.Alias): 4827 e = self.expression(exp.PropertyEQ, this=e.args.get("alias"), expression=e.this) 4828 4829 if not isinstance(e, exp.PropertyEQ): 4830 e = self.expression( 4831 exp.PropertyEQ, this=exp.to_identifier(e.this.name), expression=e.expression 4832 ) 4833 4834 if isinstance(e.this, exp.Column): 4835 e.this.replace(e.this.this) 4836 4837 transformed.append(e) 4838 4839 return transformed 4840 4841 def _parse_function_parameter(self) -> t.Optional[exp.Expression]: 4842 return self._parse_column_def(self._parse_id_var()) 4843 4844 def _parse_user_defined_function( 4845 self, kind: t.Optional[TokenType] = None 4846 ) -> t.Optional[exp.Expression]: 4847 this = self._parse_id_var() 4848 4849 while self._match(TokenType.DOT): 4850 this = self.expression(exp.Dot, this=this, expression=self._parse_id_var()) 4851 4852 if not self._match(TokenType.L_PAREN): 4853 return this 4854 4855 expressions = self._parse_csv(self._parse_function_parameter) 4856 self._match_r_paren() 4857 return self.expression( 4858 exp.UserDefinedFunction, this=this, expressions=expressions, wrapped=True 4859 ) 4860 4861 def _parse_introducer(self, token: Token) -> exp.Introducer | exp.Identifier: 4862 literal = self._parse_primary() 4863 if literal: 4864 return self.expression(exp.Introducer, this=token.text, expression=literal) 4865 4866 return self.expression(exp.Identifier, this=token.text) 4867 4868 def _parse_session_parameter(self) -> exp.SessionParameter: 4869 kind = None 4870 this = self._parse_id_var() or self._parse_primary() 4871 4872 if this and self._match(TokenType.DOT): 4873 kind = this.name 4874 this = self._parse_var() or self._parse_primary() 4875 4876 return self.expression(exp.SessionParameter, this=this, kind=kind) 4877 4878 def _parse_lambda_arg(self) -> t.Optional[exp.Expression]: 4879 return self._parse_id_var() 4880 4881 def _parse_lambda(self, alias: bool = False) -> t.Optional[exp.Expression]: 4882 index = self._index 4883 4884 if self._match(TokenType.L_PAREN): 4885 expressions = t.cast( 4886 t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_lambda_arg) 4887 ) 4888 4889 if not self._match(TokenType.R_PAREN): 4890 self._retreat(index) 4891 else: 4892 expressions = [self._parse_lambda_arg()] 4893 4894 if self._match_set(self.LAMBDAS): 4895 return self.LAMBDAS[self._prev.token_type](self, expressions) 4896 4897 self._retreat(index) 4898 4899 this: t.Optional[exp.Expression] 4900 4901 if self._match(TokenType.DISTINCT): 4902 this = self.expression( 4903 exp.Distinct, expressions=self._parse_csv(self._parse_assignment) 4904 ) 4905 else: 4906 this = self._parse_select_or_expression(alias=alias) 4907 4908 return self._parse_limit( 4909 self._parse_order(self._parse_having_max(self._parse_respect_or_ignore_nulls(this))) 4910 ) 4911 4912 def _parse_schema(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 4913 index = self._index 4914 if not self._match(TokenType.L_PAREN): 4915 return this 4916 4917 # Disambiguate between schema and subquery/CTE, e.g. in INSERT INTO table (<expr>), 4918 # expr can be of both types 4919 if self._match_set(self.SELECT_START_TOKENS): 4920 self._retreat(index) 4921 return this 4922 args = self._parse_csv(lambda: self._parse_constraint() or self._parse_field_def()) 4923 self._match_r_paren() 4924 return self.expression(exp.Schema, this=this, expressions=args) 4925 4926 def _parse_field_def(self) -> t.Optional[exp.Expression]: 4927 return self._parse_column_def(self._parse_field(any_token=True)) 4928 4929 def _parse_column_def(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4930 # column defs are not really columns, they're identifiers 4931 if isinstance(this, exp.Column): 4932 this = this.this 4933 4934 kind = self._parse_types(schema=True) 4935 4936 if self._match_text_seq("FOR", "ORDINALITY"): 4937 return self.expression(exp.ColumnDef, this=this, ordinality=True) 4938 4939 constraints: t.List[exp.Expression] = [] 4940 4941 if (not kind and self._match(TokenType.ALIAS)) or self._match_texts( 4942 ("ALIAS", "MATERIALIZED") 4943 ): 4944 persisted = self._prev.text.upper() == "MATERIALIZED" 4945 constraints.append( 4946 self.expression( 4947 exp.ComputedColumnConstraint, 4948 this=self._parse_assignment(), 4949 persisted=persisted or self._match_text_seq("PERSISTED"), 4950 not_null=self._match_pair(TokenType.NOT, TokenType.NULL), 4951 ) 4952 ) 4953 elif kind and self._match_pair(TokenType.ALIAS, TokenType.L_PAREN, advance=False): 4954 self._match(TokenType.ALIAS) 4955 constraints.append( 4956 self.expression(exp.TransformColumnConstraint, this=self._parse_field()) 4957 ) 4958 4959 while True: 4960 constraint = self._parse_column_constraint() 4961 if not constraint: 4962 break 4963 constraints.append(constraint) 4964 4965 if not kind and not constraints: 4966 return this 4967 4968 return self.expression(exp.ColumnDef, this=this, kind=kind, constraints=constraints) 4969 4970 def _parse_auto_increment( 4971 self, 4972 ) -> exp.GeneratedAsIdentityColumnConstraint | exp.AutoIncrementColumnConstraint: 4973 start = None 4974 increment = None 4975 4976 if self._match(TokenType.L_PAREN, advance=False): 4977 args = self._parse_wrapped_csv(self._parse_bitwise) 4978 start = seq_get(args, 0) 4979 increment = seq_get(args, 1) 4980 elif self._match_text_seq("START"): 4981 start = self._parse_bitwise() 4982 self._match_text_seq("INCREMENT") 4983 increment = self._parse_bitwise() 4984 4985 if start and increment: 4986 return exp.GeneratedAsIdentityColumnConstraint(start=start, increment=increment) 4987 4988 return exp.AutoIncrementColumnConstraint() 4989 4990 def _parse_auto_property(self) -> t.Optional[exp.AutoRefreshProperty]: 4991 if not self._match_text_seq("REFRESH"): 4992 self._retreat(self._index - 1) 4993 return None 4994 return self.expression(exp.AutoRefreshProperty, this=self._parse_var(upper=True)) 4995 4996 def _parse_compress(self) -> exp.CompressColumnConstraint: 4997 if self._match(TokenType.L_PAREN, advance=False): 4998 return self.expression( 4999 exp.CompressColumnConstraint, this=self._parse_wrapped_csv(self._parse_bitwise) 5000 ) 5001 5002 return self.expression(exp.CompressColumnConstraint, this=self._parse_bitwise()) 5003 5004 def _parse_generated_as_identity( 5005 self, 5006 ) -> ( 5007 exp.GeneratedAsIdentityColumnConstraint 5008 | exp.ComputedColumnConstraint 5009 | exp.GeneratedAsRowColumnConstraint 5010 ): 5011 if self._match_text_seq("BY", "DEFAULT"): 5012 on_null = self._match_pair(TokenType.ON, TokenType.NULL) 5013 this = self.expression( 5014 exp.GeneratedAsIdentityColumnConstraint, this=False, on_null=on_null 5015 ) 5016 else: 5017 self._match_text_seq("ALWAYS") 5018 this = self.expression(exp.GeneratedAsIdentityColumnConstraint, this=True) 5019 5020 self._match(TokenType.ALIAS) 5021 5022 if self._match_text_seq("ROW"): 5023 start = self._match_text_seq("START") 5024 if not start: 5025 self._match(TokenType.END) 5026 hidden = self._match_text_seq("HIDDEN") 5027 return self.expression(exp.GeneratedAsRowColumnConstraint, start=start, hidden=hidden) 5028 5029 identity = self._match_text_seq("IDENTITY") 5030 5031 if self._match(TokenType.L_PAREN): 5032 if self._match(TokenType.START_WITH): 5033 this.set("start", self._parse_bitwise()) 5034 if self._match_text_seq("INCREMENT", "BY"): 5035 this.set("increment", self._parse_bitwise()) 5036 if self._match_text_seq("MINVALUE"): 5037 this.set("minvalue", self._parse_bitwise()) 5038 if self._match_text_seq("MAXVALUE"): 5039 this.set("maxvalue", self._parse_bitwise()) 5040 5041 if self._match_text_seq("CYCLE"): 5042 this.set("cycle", True) 5043 elif self._match_text_seq("NO", "CYCLE"): 5044 this.set("cycle", False) 5045 5046 if not identity: 5047 this.set("expression", self._parse_range()) 5048 elif not this.args.get("start") and self._match(TokenType.NUMBER, advance=False): 5049 args = self._parse_csv(self._parse_bitwise) 5050 this.set("start", seq_get(args, 0)) 5051 this.set("increment", seq_get(args, 1)) 5052 5053 self._match_r_paren() 5054 5055 return this 5056 5057 def _parse_inline(self) -> exp.InlineLengthColumnConstraint: 5058 self._match_text_seq("LENGTH") 5059 return self.expression(exp.InlineLengthColumnConstraint, this=self._parse_bitwise()) 5060 5061 def _parse_not_constraint(self) -> t.Optional[exp.Expression]: 5062 if self._match_text_seq("NULL"): 5063 return self.expression(exp.NotNullColumnConstraint) 5064 if self._match_text_seq("CASESPECIFIC"): 5065 return self.expression(exp.CaseSpecificColumnConstraint, not_=True) 5066 if self._match_text_seq("FOR", "REPLICATION"): 5067 return self.expression(exp.NotForReplicationColumnConstraint) 5068 return None 5069 5070 def _parse_column_constraint(self) -> t.Optional[exp.Expression]: 5071 if self._match(TokenType.CONSTRAINT): 5072 this = self._parse_id_var() 5073 else: 5074 this = None 5075 5076 if self._match_texts(self.CONSTRAINT_PARSERS): 5077 return self.expression( 5078 exp.ColumnConstraint, 5079 this=this, 5080 kind=self.CONSTRAINT_PARSERS[self._prev.text.upper()](self), 5081 ) 5082 5083 return this 5084 5085 def _parse_constraint(self) -> t.Optional[exp.Expression]: 5086 if not self._match(TokenType.CONSTRAINT): 5087 return self._parse_unnamed_constraint(constraints=self.SCHEMA_UNNAMED_CONSTRAINTS) 5088 5089 return self.expression( 5090 exp.Constraint, 5091 this=self._parse_id_var(), 5092 expressions=self._parse_unnamed_constraints(), 5093 ) 5094 5095 def _parse_unnamed_constraints(self) -> t.List[exp.Expression]: 5096 constraints = [] 5097 while True: 5098 constraint = self._parse_unnamed_constraint() or self._parse_function() 5099 if not constraint: 5100 break 5101 constraints.append(constraint) 5102 5103 return constraints 5104 5105 def _parse_unnamed_constraint( 5106 self, constraints: t.Optional[t.Collection[str]] = None 5107 ) -> t.Optional[exp.Expression]: 5108 if self._match(TokenType.IDENTIFIER, advance=False) or not self._match_texts( 5109 constraints or self.CONSTRAINT_PARSERS 5110 ): 5111 return None 5112 5113 constraint = self._prev.text.upper() 5114 if constraint not in self.CONSTRAINT_PARSERS: 5115 self.raise_error(f"No parser found for schema constraint {constraint}.") 5116 5117 return self.CONSTRAINT_PARSERS[constraint](self) 5118 5119 def _parse_unique(self) -> exp.UniqueColumnConstraint: 5120 self._match_text_seq("KEY") 5121 return self.expression( 5122 exp.UniqueColumnConstraint, 5123 this=self._parse_schema(self._parse_id_var(any_token=False)), 5124 index_type=self._match(TokenType.USING) and self._advance_any() and self._prev.text, 5125 on_conflict=self._parse_on_conflict(), 5126 ) 5127 5128 def _parse_key_constraint_options(self) -> t.List[str]: 5129 options = [] 5130 while True: 5131 if not self._curr: 5132 break 5133 5134 if self._match(TokenType.ON): 5135 action = None 5136 on = self._advance_any() and self._prev.text 5137 5138 if self._match_text_seq("NO", "ACTION"): 5139 action = "NO ACTION" 5140 elif self._match_text_seq("CASCADE"): 5141 action = "CASCADE" 5142 elif self._match_text_seq("RESTRICT"): 5143 action = "RESTRICT" 5144 elif self._match_pair(TokenType.SET, TokenType.NULL): 5145 action = "SET NULL" 5146 elif self._match_pair(TokenType.SET, TokenType.DEFAULT): 5147 action = "SET DEFAULT" 5148 else: 5149 self.raise_error("Invalid key constraint") 5150 5151 options.append(f"ON {on} {action}") 5152 elif self._match_text_seq("NOT", "ENFORCED"): 5153 options.append("NOT ENFORCED") 5154 elif self._match_text_seq("DEFERRABLE"): 5155 options.append("DEFERRABLE") 5156 elif self._match_text_seq("INITIALLY", "DEFERRED"): 5157 options.append("INITIALLY DEFERRED") 5158 elif self._match_text_seq("NORELY"): 5159 options.append("NORELY") 5160 elif self._match_text_seq("MATCH", "FULL"): 5161 options.append("MATCH FULL") 5162 else: 5163 break 5164 5165 return options 5166 5167 def _parse_references(self, match: bool = True) -> t.Optional[exp.Reference]: 5168 if match and not self._match(TokenType.REFERENCES): 5169 return None 5170 5171 expressions = None 5172 this = self._parse_table(schema=True) 5173 options = self._parse_key_constraint_options() 5174 return self.expression(exp.Reference, this=this, expressions=expressions, options=options) 5175 5176 def _parse_foreign_key(self) -> exp.ForeignKey: 5177 expressions = self._parse_wrapped_id_vars() 5178 reference = self._parse_references() 5179 options = {} 5180 5181 while self._match(TokenType.ON): 5182 if not self._match_set((TokenType.DELETE, TokenType.UPDATE)): 5183 self.raise_error("Expected DELETE or UPDATE") 5184 5185 kind = self._prev.text.lower() 5186 5187 if self._match_text_seq("NO", "ACTION"): 5188 action = "NO ACTION" 5189 elif self._match(TokenType.SET): 5190 self._match_set((TokenType.NULL, TokenType.DEFAULT)) 5191 action = "SET " + self._prev.text.upper() 5192 else: 5193 self._advance() 5194 action = self._prev.text.upper() 5195 5196 options[kind] = action 5197 5198 return self.expression( 5199 exp.ForeignKey, 5200 expressions=expressions, 5201 reference=reference, 5202 **options, # type: ignore 5203 ) 5204 5205 def _parse_primary_key_part(self) -> t.Optional[exp.Expression]: 5206 return self._parse_field() 5207 5208 def _parse_period_for_system_time(self) -> t.Optional[exp.PeriodForSystemTimeConstraint]: 5209 if not self._match(TokenType.TIMESTAMP_SNAPSHOT): 5210 self._retreat(self._index - 1) 5211 return None 5212 5213 id_vars = self._parse_wrapped_id_vars() 5214 return self.expression( 5215 exp.PeriodForSystemTimeConstraint, 5216 this=seq_get(id_vars, 0), 5217 expression=seq_get(id_vars, 1), 5218 ) 5219 5220 def _parse_primary_key( 5221 self, wrapped_optional: bool = False, in_props: bool = False 5222 ) -> exp.PrimaryKeyColumnConstraint | exp.PrimaryKey: 5223 desc = ( 5224 self._match_set((TokenType.ASC, TokenType.DESC)) 5225 and self._prev.token_type == TokenType.DESC 5226 ) 5227 5228 if not in_props and not self._match(TokenType.L_PAREN, advance=False): 5229 return self.expression(exp.PrimaryKeyColumnConstraint, desc=desc) 5230 5231 expressions = self._parse_wrapped_csv( 5232 self._parse_primary_key_part, optional=wrapped_optional 5233 ) 5234 options = self._parse_key_constraint_options() 5235 return self.expression(exp.PrimaryKey, expressions=expressions, options=options) 5236 5237 def _parse_bracket_key_value(self, is_map: bool = False) -> t.Optional[exp.Expression]: 5238 return self._parse_slice(self._parse_alias(self._parse_assignment(), explicit=True)) 5239 5240 def _parse_bracket(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 5241 if not self._match_set((TokenType.L_BRACKET, TokenType.L_BRACE)): 5242 return this 5243 5244 bracket_kind = self._prev.token_type 5245 expressions = self._parse_csv( 5246 lambda: self._parse_bracket_key_value(is_map=bracket_kind == TokenType.L_BRACE) 5247 ) 5248 5249 if bracket_kind == TokenType.L_BRACKET and not self._match(TokenType.R_BRACKET): 5250 self.raise_error("Expected ]") 5251 elif bracket_kind == TokenType.L_BRACE and not self._match(TokenType.R_BRACE): 5252 self.raise_error("Expected }") 5253 5254 # https://duckdb.org/docs/sql/data_types/struct.html#creating-structs 5255 if bracket_kind == TokenType.L_BRACE: 5256 this = self.expression(exp.Struct, expressions=self._kv_to_prop_eq(expressions)) 5257 elif not this: 5258 this = self.expression(exp.Array, expressions=expressions) 5259 else: 5260 constructor_type = self.ARRAY_CONSTRUCTORS.get(this.name.upper()) 5261 if constructor_type: 5262 return self.expression(constructor_type, expressions=expressions) 5263 5264 expressions = apply_index_offset(this, expressions, -self.dialect.INDEX_OFFSET) 5265 this = self.expression(exp.Bracket, this=this, expressions=expressions) 5266 5267 self._add_comments(this) 5268 return self._parse_bracket(this) 5269 5270 def _parse_slice(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5271 if self._match(TokenType.COLON): 5272 return self.expression(exp.Slice, this=this, expression=self._parse_assignment()) 5273 return this 5274 5275 def _parse_case(self) -> t.Optional[exp.Expression]: 5276 ifs = [] 5277 default = None 5278 5279 comments = self._prev_comments 5280 expression = self._parse_assignment() 5281 5282 while self._match(TokenType.WHEN): 5283 this = self._parse_assignment() 5284 self._match(TokenType.THEN) 5285 then = self._parse_assignment() 5286 ifs.append(self.expression(exp.If, this=this, true=then)) 5287 5288 if self._match(TokenType.ELSE): 5289 default = self._parse_assignment() 5290 5291 if not self._match(TokenType.END): 5292 if isinstance(default, exp.Interval) and default.this.sql().upper() == "END": 5293 default = exp.column("interval") 5294 else: 5295 self.raise_error("Expected END after CASE", self._prev) 5296 5297 return self.expression( 5298 exp.Case, comments=comments, this=expression, ifs=ifs, default=default 5299 ) 5300 5301 def _parse_if(self) -> t.Optional[exp.Expression]: 5302 if self._match(TokenType.L_PAREN): 5303 args = self._parse_csv(self._parse_assignment) 5304 this = self.validate_expression(exp.If.from_arg_list(args), args) 5305 self._match_r_paren() 5306 else: 5307 index = self._index - 1 5308 5309 if self.NO_PAREN_IF_COMMANDS and index == 0: 5310 return self._parse_as_command(self._prev) 5311 5312 condition = self._parse_assignment() 5313 5314 if not condition: 5315 self._retreat(index) 5316 return None 5317 5318 self._match(TokenType.THEN) 5319 true = self._parse_assignment() 5320 false = self._parse_assignment() if self._match(TokenType.ELSE) else None 5321 self._match(TokenType.END) 5322 this = self.expression(exp.If, this=condition, true=true, false=false) 5323 5324 return this 5325 5326 def _parse_next_value_for(self) -> t.Optional[exp.Expression]: 5327 if not self._match_text_seq("VALUE", "FOR"): 5328 self._retreat(self._index - 1) 5329 return None 5330 5331 return self.expression( 5332 exp.NextValueFor, 5333 this=self._parse_column(), 5334 order=self._match(TokenType.OVER) and self._parse_wrapped(self._parse_order), 5335 ) 5336 5337 def _parse_extract(self) -> exp.Extract: 5338 this = self._parse_function() or self._parse_var() or self._parse_type() 5339 5340 if self._match(TokenType.FROM): 5341 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 5342 5343 if not self._match(TokenType.COMMA): 5344 self.raise_error("Expected FROM or comma after EXTRACT", self._prev) 5345 5346 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 5347 5348 def _parse_gap_fill(self) -> exp.GapFill: 5349 self._match(TokenType.TABLE) 5350 this = self._parse_table() 5351 5352 self._match(TokenType.COMMA) 5353 args = [this, *self._parse_csv(self._parse_lambda)] 5354 5355 gap_fill = exp.GapFill.from_arg_list(args) 5356 return self.validate_expression(gap_fill, args) 5357 5358 def _parse_cast(self, strict: bool, safe: t.Optional[bool] = None) -> exp.Expression: 5359 this = self._parse_assignment() 5360 5361 if not self._match(TokenType.ALIAS): 5362 if self._match(TokenType.COMMA): 5363 return self.expression(exp.CastToStrType, this=this, to=self._parse_string()) 5364 5365 self.raise_error("Expected AS after CAST") 5366 5367 fmt = None 5368 to = self._parse_types() 5369 5370 if self._match(TokenType.FORMAT): 5371 fmt_string = self._parse_string() 5372 fmt = self._parse_at_time_zone(fmt_string) 5373 5374 if not to: 5375 to = exp.DataType.build(exp.DataType.Type.UNKNOWN) 5376 if not safe and to.this in exp.DataType.TEMPORAL_TYPES: 5377 this = self.expression( 5378 exp.StrToDate if to.this == exp.DataType.Type.DATE else exp.StrToTime, 5379 this=this, 5380 format=exp.Literal.string( 5381 format_time( 5382 fmt_string.this if fmt_string else "", 5383 self.dialect.FORMAT_MAPPING or self.dialect.TIME_MAPPING, 5384 self.dialect.FORMAT_TRIE or self.dialect.TIME_TRIE, 5385 ) 5386 ), 5387 ) 5388 5389 if isinstance(fmt, exp.AtTimeZone) and isinstance(this, exp.StrToTime): 5390 this.set("zone", fmt.args["zone"]) 5391 return this 5392 elif not to: 5393 self.raise_error("Expected TYPE after CAST") 5394 elif isinstance(to, exp.Identifier): 5395 to = exp.DataType.build(to.name, udt=True) 5396 elif to.this == exp.DataType.Type.CHAR: 5397 if self._match(TokenType.CHARACTER_SET): 5398 to = self.expression(exp.CharacterSet, this=self._parse_var_or_string()) 5399 5400 return self.expression( 5401 exp.Cast if strict else exp.TryCast, 5402 this=this, 5403 to=to, 5404 format=fmt, 5405 safe=safe, 5406 action=self._parse_var_from_options(self.CAST_ACTIONS, raise_unmatched=False), 5407 ) 5408 5409 def _parse_string_agg(self) -> exp.Expression: 5410 if self._match(TokenType.DISTINCT): 5411 args: t.List[t.Optional[exp.Expression]] = [ 5412 self.expression(exp.Distinct, expressions=[self._parse_assignment()]) 5413 ] 5414 if self._match(TokenType.COMMA): 5415 args.extend(self._parse_csv(self._parse_assignment)) 5416 else: 5417 args = self._parse_csv(self._parse_assignment) # type: ignore 5418 5419 index = self._index 5420 if not self._match(TokenType.R_PAREN) and args: 5421 # postgres: STRING_AGG([DISTINCT] expression, separator [ORDER BY expression1 {ASC | DESC} [, ...]]) 5422 # bigquery: STRING_AGG([DISTINCT] expression [, separator] [ORDER BY key [{ASC | DESC}] [, ... ]] [LIMIT n]) 5423 args[-1] = self._parse_limit(this=self._parse_order(this=args[-1])) 5424 return self.expression(exp.GroupConcat, this=args[0], separator=seq_get(args, 1)) 5425 5426 # Checks if we can parse an order clause: WITHIN GROUP (ORDER BY <order_by_expression_list> [ASC | DESC]). 5427 # This is done "manually", instead of letting _parse_window parse it into an exp.WithinGroup node, so that 5428 # the STRING_AGG call is parsed like in MySQL / SQLite and can thus be transpiled more easily to them. 5429 if not self._match_text_seq("WITHIN", "GROUP"): 5430 self._retreat(index) 5431 return self.validate_expression(exp.GroupConcat.from_arg_list(args), args) 5432 5433 self._match_l_paren() # The corresponding match_r_paren will be called in parse_function (caller) 5434 order = self._parse_order(this=seq_get(args, 0)) 5435 return self.expression(exp.GroupConcat, this=order, separator=seq_get(args, 1)) 5436 5437 def _parse_convert( 5438 self, strict: bool, safe: t.Optional[bool] = None 5439 ) -> t.Optional[exp.Expression]: 5440 this = self._parse_bitwise() 5441 5442 if self._match(TokenType.USING): 5443 to: t.Optional[exp.Expression] = self.expression( 5444 exp.CharacterSet, this=self._parse_var() 5445 ) 5446 elif self._match(TokenType.COMMA): 5447 to = self._parse_types() 5448 else: 5449 to = None 5450 5451 return self.expression(exp.Cast if strict else exp.TryCast, this=this, to=to, safe=safe) 5452 5453 def _parse_decode(self) -> t.Optional[exp.Decode | exp.Case]: 5454 """ 5455 There are generally two variants of the DECODE function: 5456 5457 - DECODE(bin, charset) 5458 - DECODE(expression, search, result [, search, result] ... [, default]) 5459 5460 The second variant will always be parsed into a CASE expression. Note that NULL 5461 needs special treatment, since we need to explicitly check for it with `IS NULL`, 5462 instead of relying on pattern matching. 5463 """ 5464 args = self._parse_csv(self._parse_assignment) 5465 5466 if len(args) < 3: 5467 return self.expression(exp.Decode, this=seq_get(args, 0), charset=seq_get(args, 1)) 5468 5469 expression, *expressions = args 5470 if not expression: 5471 return None 5472 5473 ifs = [] 5474 for search, result in zip(expressions[::2], expressions[1::2]): 5475 if not search or not result: 5476 return None 5477 5478 if isinstance(search, exp.Literal): 5479 ifs.append( 5480 exp.If(this=exp.EQ(this=expression.copy(), expression=search), true=result) 5481 ) 5482 elif isinstance(search, exp.Null): 5483 ifs.append( 5484 exp.If(this=exp.Is(this=expression.copy(), expression=exp.Null()), true=result) 5485 ) 5486 else: 5487 cond = exp.or_( 5488 exp.EQ(this=expression.copy(), expression=search), 5489 exp.and_( 5490 exp.Is(this=expression.copy(), expression=exp.Null()), 5491 exp.Is(this=search.copy(), expression=exp.Null()), 5492 copy=False, 5493 ), 5494 copy=False, 5495 ) 5496 ifs.append(exp.If(this=cond, true=result)) 5497 5498 return exp.Case(ifs=ifs, default=expressions[-1] if len(expressions) % 2 == 1 else None) 5499 5500 def _parse_json_key_value(self) -> t.Optional[exp.JSONKeyValue]: 5501 self._match_text_seq("KEY") 5502 key = self._parse_column() 5503 self._match_set(self.JSON_KEY_VALUE_SEPARATOR_TOKENS) 5504 self._match_text_seq("VALUE") 5505 value = self._parse_bitwise() 5506 5507 if not key and not value: 5508 return None 5509 return self.expression(exp.JSONKeyValue, this=key, expression=value) 5510 5511 def _parse_format_json(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5512 if not this or not self._match_text_seq("FORMAT", "JSON"): 5513 return this 5514 5515 return self.expression(exp.FormatJson, this=this) 5516 5517 def _parse_on_handling(self, on: str, *values: str) -> t.Optional[str]: 5518 # Parses the "X ON Y" syntax, i.e. NULL ON NULL (Oracle, T-SQL) 5519 for value in values: 5520 if self._match_text_seq(value, "ON", on): 5521 return f"{value} ON {on}" 5522 5523 return None 5524 5525 @t.overload 5526 def _parse_json_object(self, agg: Lit[False]) -> exp.JSONObject: ... 5527 5528 @t.overload 5529 def _parse_json_object(self, agg: Lit[True]) -> exp.JSONObjectAgg: ... 5530 5531 def _parse_json_object(self, agg=False): 5532 star = self._parse_star() 5533 expressions = ( 5534 [star] 5535 if star 5536 else self._parse_csv(lambda: self._parse_format_json(self._parse_json_key_value())) 5537 ) 5538 null_handling = self._parse_on_handling("NULL", "NULL", "ABSENT") 5539 5540 unique_keys = None 5541 if self._match_text_seq("WITH", "UNIQUE"): 5542 unique_keys = True 5543 elif self._match_text_seq("WITHOUT", "UNIQUE"): 5544 unique_keys = False 5545 5546 self._match_text_seq("KEYS") 5547 5548 return_type = self._match_text_seq("RETURNING") and self._parse_format_json( 5549 self._parse_type() 5550 ) 5551 encoding = self._match_text_seq("ENCODING") and self._parse_var() 5552 5553 return self.expression( 5554 exp.JSONObjectAgg if agg else exp.JSONObject, 5555 expressions=expressions, 5556 null_handling=null_handling, 5557 unique_keys=unique_keys, 5558 return_type=return_type, 5559 encoding=encoding, 5560 ) 5561 5562 # Note: this is currently incomplete; it only implements the "JSON_value_column" part 5563 def _parse_json_column_def(self) -> exp.JSONColumnDef: 5564 if not self._match_text_seq("NESTED"): 5565 this = self._parse_id_var() 5566 kind = self._parse_types(allow_identifiers=False) 5567 nested = None 5568 else: 5569 this = None 5570 kind = None 5571 nested = True 5572 5573 path = self._match_text_seq("PATH") and self._parse_string() 5574 nested_schema = nested and self._parse_json_schema() 5575 5576 return self.expression( 5577 exp.JSONColumnDef, 5578 this=this, 5579 kind=kind, 5580 path=path, 5581 nested_schema=nested_schema, 5582 ) 5583 5584 def _parse_json_schema(self) -> exp.JSONSchema: 5585 self._match_text_seq("COLUMNS") 5586 return self.expression( 5587 exp.JSONSchema, 5588 expressions=self._parse_wrapped_csv(self._parse_json_column_def, optional=True), 5589 ) 5590 5591 def _parse_json_table(self) -> exp.JSONTable: 5592 this = self._parse_format_json(self._parse_bitwise()) 5593 path = self._match(TokenType.COMMA) and self._parse_string() 5594 error_handling = self._parse_on_handling("ERROR", "ERROR", "NULL") 5595 empty_handling = self._parse_on_handling("EMPTY", "ERROR", "NULL") 5596 schema = self._parse_json_schema() 5597 5598 return exp.JSONTable( 5599 this=this, 5600 schema=schema, 5601 path=path, 5602 error_handling=error_handling, 5603 empty_handling=empty_handling, 5604 ) 5605 5606 def _parse_match_against(self) -> exp.MatchAgainst: 5607 expressions = self._parse_csv(self._parse_column) 5608 5609 self._match_text_seq(")", "AGAINST", "(") 5610 5611 this = self._parse_string() 5612 5613 if self._match_text_seq("IN", "NATURAL", "LANGUAGE", "MODE"): 5614 modifier = "IN NATURAL LANGUAGE MODE" 5615 if self._match_text_seq("WITH", "QUERY", "EXPANSION"): 5616 modifier = f"{modifier} WITH QUERY EXPANSION" 5617 elif self._match_text_seq("IN", "BOOLEAN", "MODE"): 5618 modifier = "IN BOOLEAN MODE" 5619 elif self._match_text_seq("WITH", "QUERY", "EXPANSION"): 5620 modifier = "WITH QUERY EXPANSION" 5621 else: 5622 modifier = None 5623 5624 return self.expression( 5625 exp.MatchAgainst, this=this, expressions=expressions, modifier=modifier 5626 ) 5627 5628 # https://learn.microsoft.com/en-us/sql/t-sql/functions/openjson-transact-sql?view=sql-server-ver16 5629 def _parse_open_json(self) -> exp.OpenJSON: 5630 this = self._parse_bitwise() 5631 path = self._match(TokenType.COMMA) and self._parse_string() 5632 5633 def _parse_open_json_column_def() -> exp.OpenJSONColumnDef: 5634 this = self._parse_field(any_token=True) 5635 kind = self._parse_types() 5636 path = self._parse_string() 5637 as_json = self._match_pair(TokenType.ALIAS, TokenType.JSON) 5638 5639 return self.expression( 5640 exp.OpenJSONColumnDef, this=this, kind=kind, path=path, as_json=as_json 5641 ) 5642 5643 expressions = None 5644 if self._match_pair(TokenType.R_PAREN, TokenType.WITH): 5645 self._match_l_paren() 5646 expressions = self._parse_csv(_parse_open_json_column_def) 5647 5648 return self.expression(exp.OpenJSON, this=this, path=path, expressions=expressions) 5649 5650 def _parse_position(self, haystack_first: bool = False) -> exp.StrPosition: 5651 args = self._parse_csv(self._parse_bitwise) 5652 5653 if self._match(TokenType.IN): 5654 return self.expression( 5655 exp.StrPosition, this=self._parse_bitwise(), substr=seq_get(args, 0) 5656 ) 5657 5658 if haystack_first: 5659 haystack = seq_get(args, 0) 5660 needle = seq_get(args, 1) 5661 else: 5662 needle = seq_get(args, 0) 5663 haystack = seq_get(args, 1) 5664 5665 return self.expression( 5666 exp.StrPosition, this=haystack, substr=needle, position=seq_get(args, 2) 5667 ) 5668 5669 def _parse_predict(self) -> exp.Predict: 5670 self._match_text_seq("MODEL") 5671 this = self._parse_table() 5672 5673 self._match(TokenType.COMMA) 5674 self._match_text_seq("TABLE") 5675 5676 return self.expression( 5677 exp.Predict, 5678 this=this, 5679 expression=self._parse_table(), 5680 params_struct=self._match(TokenType.COMMA) and self._parse_bitwise(), 5681 ) 5682 5683 def _parse_join_hint(self, func_name: str) -> exp.JoinHint: 5684 args = self._parse_csv(self._parse_table) 5685 return exp.JoinHint(this=func_name.upper(), expressions=args) 5686 5687 def _parse_substring(self) -> exp.Substring: 5688 # Postgres supports the form: substring(string [from int] [for int]) 5689 # https://www.postgresql.org/docs/9.1/functions-string.html @ Table 9-6 5690 5691 args = t.cast(t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_bitwise)) 5692 5693 if self._match(TokenType.FROM): 5694 args.append(self._parse_bitwise()) 5695 if self._match(TokenType.FOR): 5696 if len(args) == 1: 5697 args.append(exp.Literal.number(1)) 5698 args.append(self._parse_bitwise()) 5699 5700 return self.validate_expression(exp.Substring.from_arg_list(args), args) 5701 5702 def _parse_trim(self) -> exp.Trim: 5703 # https://www.w3resource.com/sql/character-functions/trim.php 5704 # https://docs.oracle.com/javadb/10.8.3.0/ref/rreftrimfunc.html 5705 5706 position = None 5707 collation = None 5708 expression = None 5709 5710 if self._match_texts(self.TRIM_TYPES): 5711 position = self._prev.text.upper() 5712 5713 this = self._parse_bitwise() 5714 if self._match_set((TokenType.FROM, TokenType.COMMA)): 5715 invert_order = self._prev.token_type == TokenType.FROM or self.TRIM_PATTERN_FIRST 5716 expression = self._parse_bitwise() 5717 5718 if invert_order: 5719 this, expression = expression, this 5720 5721 if self._match(TokenType.COLLATE): 5722 collation = self._parse_bitwise() 5723 5724 return self.expression( 5725 exp.Trim, this=this, position=position, expression=expression, collation=collation 5726 ) 5727 5728 def _parse_window_clause(self) -> t.Optional[t.List[exp.Expression]]: 5729 return self._match(TokenType.WINDOW) and self._parse_csv(self._parse_named_window) 5730 5731 def _parse_named_window(self) -> t.Optional[exp.Expression]: 5732 return self._parse_window(self._parse_id_var(), alias=True) 5733 5734 def _parse_respect_or_ignore_nulls( 5735 self, this: t.Optional[exp.Expression] 5736 ) -> t.Optional[exp.Expression]: 5737 if self._match_text_seq("IGNORE", "NULLS"): 5738 return self.expression(exp.IgnoreNulls, this=this) 5739 if self._match_text_seq("RESPECT", "NULLS"): 5740 return self.expression(exp.RespectNulls, this=this) 5741 return this 5742 5743 def _parse_having_max(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5744 if self._match(TokenType.HAVING): 5745 self._match_texts(("MAX", "MIN")) 5746 max = self._prev.text.upper() != "MIN" 5747 return self.expression( 5748 exp.HavingMax, this=this, expression=self._parse_column(), max=max 5749 ) 5750 5751 return this 5752 5753 def _parse_window( 5754 self, this: t.Optional[exp.Expression], alias: bool = False 5755 ) -> t.Optional[exp.Expression]: 5756 func = this 5757 comments = func.comments if isinstance(func, exp.Expression) else None 5758 5759 if self._match_pair(TokenType.FILTER, TokenType.L_PAREN): 5760 self._match(TokenType.WHERE) 5761 this = self.expression( 5762 exp.Filter, this=this, expression=self._parse_where(skip_where_token=True) 5763 ) 5764 self._match_r_paren() 5765 5766 # T-SQL allows the OVER (...) syntax after WITHIN GROUP. 5767 # https://learn.microsoft.com/en-us/sql/t-sql/functions/percentile-disc-transact-sql?view=sql-server-ver16 5768 if self._match_text_seq("WITHIN", "GROUP"): 5769 order = self._parse_wrapped(self._parse_order) 5770 this = self.expression(exp.WithinGroup, this=this, expression=order) 5771 5772 # SQL spec defines an optional [ { IGNORE | RESPECT } NULLS ] OVER 5773 # Some dialects choose to implement and some do not. 5774 # https://dev.mysql.com/doc/refman/8.0/en/window-function-descriptions.html 5775 5776 # There is some code above in _parse_lambda that handles 5777 # SELECT FIRST_VALUE(TABLE.COLUMN IGNORE|RESPECT NULLS) OVER ... 5778 5779 # The below changes handle 5780 # SELECT FIRST_VALUE(TABLE.COLUMN) IGNORE|RESPECT NULLS OVER ... 5781 5782 # Oracle allows both formats 5783 # (https://docs.oracle.com/en/database/oracle/oracle-database/19/sqlrf/img_text/first_value.html) 5784 # and Snowflake chose to do the same for familiarity 5785 # https://docs.snowflake.com/en/sql-reference/functions/first_value.html#usage-notes 5786 if isinstance(this, exp.AggFunc): 5787 ignore_respect = this.find(exp.IgnoreNulls, exp.RespectNulls) 5788 5789 if ignore_respect and ignore_respect is not this: 5790 ignore_respect.replace(ignore_respect.this) 5791 this = self.expression(ignore_respect.__class__, this=this) 5792 5793 this = self._parse_respect_or_ignore_nulls(this) 5794 5795 # bigquery select from window x AS (partition by ...) 5796 if alias: 5797 over = None 5798 self._match(TokenType.ALIAS) 5799 elif not self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS): 5800 return this 5801 else: 5802 over = self._prev.text.upper() 5803 5804 if comments and isinstance(func, exp.Expression): 5805 func.pop_comments() 5806 5807 if not self._match(TokenType.L_PAREN): 5808 return self.expression( 5809 exp.Window, 5810 comments=comments, 5811 this=this, 5812 alias=self._parse_id_var(False), 5813 over=over, 5814 ) 5815 5816 window_alias = self._parse_id_var(any_token=False, tokens=self.WINDOW_ALIAS_TOKENS) 5817 5818 first = self._match(TokenType.FIRST) 5819 if self._match_text_seq("LAST"): 5820 first = False 5821 5822 partition, order = self._parse_partition_and_order() 5823 kind = self._match_set((TokenType.ROWS, TokenType.RANGE)) and self._prev.text 5824 5825 if kind: 5826 self._match(TokenType.BETWEEN) 5827 start = self._parse_window_spec() 5828 self._match(TokenType.AND) 5829 end = self._parse_window_spec() 5830 5831 spec = self.expression( 5832 exp.WindowSpec, 5833 kind=kind, 5834 start=start["value"], 5835 start_side=start["side"], 5836 end=end["value"], 5837 end_side=end["side"], 5838 ) 5839 else: 5840 spec = None 5841 5842 self._match_r_paren() 5843 5844 window = self.expression( 5845 exp.Window, 5846 comments=comments, 5847 this=this, 5848 partition_by=partition, 5849 order=order, 5850 spec=spec, 5851 alias=window_alias, 5852 over=over, 5853 first=first, 5854 ) 5855 5856 # This covers Oracle's FIRST/LAST syntax: aggregate KEEP (...) OVER (...) 5857 if self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS, advance=False): 5858 return self._parse_window(window, alias=alias) 5859 5860 return window 5861 5862 def _parse_partition_and_order( 5863 self, 5864 ) -> t.Tuple[t.List[exp.Expression], t.Optional[exp.Expression]]: 5865 return self._parse_partition_by(), self._parse_order() 5866 5867 def _parse_window_spec(self) -> t.Dict[str, t.Optional[str | exp.Expression]]: 5868 self._match(TokenType.BETWEEN) 5869 5870 return { 5871 "value": ( 5872 (self._match_text_seq("UNBOUNDED") and "UNBOUNDED") 5873 or (self._match_text_seq("CURRENT", "ROW") and "CURRENT ROW") 5874 or self._parse_bitwise() 5875 ), 5876 "side": self._match_texts(self.WINDOW_SIDES) and self._prev.text, 5877 } 5878 5879 def _parse_alias( 5880 self, this: t.Optional[exp.Expression], explicit: bool = False 5881 ) -> t.Optional[exp.Expression]: 5882 any_token = self._match(TokenType.ALIAS) 5883 comments = self._prev_comments or [] 5884 5885 if explicit and not any_token: 5886 return this 5887 5888 if self._match(TokenType.L_PAREN): 5889 aliases = self.expression( 5890 exp.Aliases, 5891 comments=comments, 5892 this=this, 5893 expressions=self._parse_csv(lambda: self._parse_id_var(any_token)), 5894 ) 5895 self._match_r_paren(aliases) 5896 return aliases 5897 5898 alias = self._parse_id_var(any_token, tokens=self.ALIAS_TOKENS) or ( 5899 self.STRING_ALIASES and self._parse_string_as_identifier() 5900 ) 5901 5902 if alias: 5903 comments.extend(alias.pop_comments()) 5904 this = self.expression(exp.Alias, comments=comments, this=this, alias=alias) 5905 column = this.this 5906 5907 # Moves the comment next to the alias in `expr /* comment */ AS alias` 5908 if not this.comments and column and column.comments: 5909 this.comments = column.pop_comments() 5910 5911 return this 5912 5913 def _parse_id_var( 5914 self, 5915 any_token: bool = True, 5916 tokens: t.Optional[t.Collection[TokenType]] = None, 5917 ) -> t.Optional[exp.Expression]: 5918 expression = self._parse_identifier() 5919 if not expression and ( 5920 (any_token and self._advance_any()) or self._match_set(tokens or self.ID_VAR_TOKENS) 5921 ): 5922 quoted = self._prev.token_type == TokenType.STRING 5923 expression = self.expression(exp.Identifier, this=self._prev.text, quoted=quoted) 5924 5925 return expression 5926 5927 def _parse_string(self) -> t.Optional[exp.Expression]: 5928 if self._match_set(self.STRING_PARSERS): 5929 return self.STRING_PARSERS[self._prev.token_type](self, self._prev) 5930 return self._parse_placeholder() 5931 5932 def _parse_string_as_identifier(self) -> t.Optional[exp.Identifier]: 5933 return exp.to_identifier(self._match(TokenType.STRING) and self._prev.text, quoted=True) 5934 5935 def _parse_number(self) -> t.Optional[exp.Expression]: 5936 if self._match_set(self.NUMERIC_PARSERS): 5937 return self.NUMERIC_PARSERS[self._prev.token_type](self, self._prev) 5938 return self._parse_placeholder() 5939 5940 def _parse_identifier(self) -> t.Optional[exp.Expression]: 5941 if self._match(TokenType.IDENTIFIER): 5942 return self.expression(exp.Identifier, this=self._prev.text, quoted=True) 5943 return self._parse_placeholder() 5944 5945 def _parse_var( 5946 self, 5947 any_token: bool = False, 5948 tokens: t.Optional[t.Collection[TokenType]] = None, 5949 upper: bool = False, 5950 ) -> t.Optional[exp.Expression]: 5951 if ( 5952 (any_token and self._advance_any()) 5953 or self._match(TokenType.VAR) 5954 or (self._match_set(tokens) if tokens else False) 5955 ): 5956 return self.expression( 5957 exp.Var, this=self._prev.text.upper() if upper else self._prev.text 5958 ) 5959 return self._parse_placeholder() 5960 5961 def _advance_any(self, ignore_reserved: bool = False) -> t.Optional[Token]: 5962 if self._curr and (ignore_reserved or self._curr.token_type not in self.RESERVED_TOKENS): 5963 self._advance() 5964 return self._prev 5965 return None 5966 5967 def _parse_var_or_string(self) -> t.Optional[exp.Expression]: 5968 return self._parse_string() or self._parse_var(any_token=True) 5969 5970 def _parse_primary_or_var(self) -> t.Optional[exp.Expression]: 5971 return self._parse_primary() or self._parse_var(any_token=True) 5972 5973 def _parse_null(self) -> t.Optional[exp.Expression]: 5974 if self._match_set(self.NULL_TOKENS): 5975 return self.PRIMARY_PARSERS[TokenType.NULL](self, self._prev) 5976 return self._parse_placeholder() 5977 5978 def _parse_boolean(self) -> t.Optional[exp.Expression]: 5979 if self._match(TokenType.TRUE): 5980 return self.PRIMARY_PARSERS[TokenType.TRUE](self, self._prev) 5981 if self._match(TokenType.FALSE): 5982 return self.PRIMARY_PARSERS[TokenType.FALSE](self, self._prev) 5983 return self._parse_placeholder() 5984 5985 def _parse_star(self) -> t.Optional[exp.Expression]: 5986 if self._match(TokenType.STAR): 5987 return self.PRIMARY_PARSERS[TokenType.STAR](self, self._prev) 5988 return self._parse_placeholder() 5989 5990 def _parse_parameter(self) -> exp.Parameter: 5991 this = self._parse_identifier() or self._parse_primary_or_var() 5992 return self.expression(exp.Parameter, this=this) 5993 5994 def _parse_placeholder(self) -> t.Optional[exp.Expression]: 5995 if self._match_set(self.PLACEHOLDER_PARSERS): 5996 placeholder = self.PLACEHOLDER_PARSERS[self._prev.token_type](self) 5997 if placeholder: 5998 return placeholder 5999 self._advance(-1) 6000 return None 6001 6002 def _parse_star_op(self, *keywords: str) -> t.Optional[t.List[exp.Expression]]: 6003 if not self._match_texts(keywords): 6004 return None 6005 if self._match(TokenType.L_PAREN, advance=False): 6006 return self._parse_wrapped_csv(self._parse_expression) 6007 6008 expression = self._parse_expression() 6009 return [expression] if expression else None 6010 6011 def _parse_csv( 6012 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA 6013 ) -> t.List[exp.Expression]: 6014 parse_result = parse_method() 6015 items = [parse_result] if parse_result is not None else [] 6016 6017 while self._match(sep): 6018 self._add_comments(parse_result) 6019 parse_result = parse_method() 6020 if parse_result is not None: 6021 items.append(parse_result) 6022 6023 return items 6024 6025 def _parse_tokens( 6026 self, parse_method: t.Callable, expressions: t.Dict 6027 ) -> t.Optional[exp.Expression]: 6028 this = parse_method() 6029 6030 while self._match_set(expressions): 6031 this = self.expression( 6032 expressions[self._prev.token_type], 6033 this=this, 6034 comments=self._prev_comments, 6035 expression=parse_method(), 6036 ) 6037 6038 return this 6039 6040 def _parse_wrapped_id_vars(self, optional: bool = False) -> t.List[exp.Expression]: 6041 return self._parse_wrapped_csv(self._parse_id_var, optional=optional) 6042 6043 def _parse_wrapped_csv( 6044 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA, optional: bool = False 6045 ) -> t.List[exp.Expression]: 6046 return self._parse_wrapped( 6047 lambda: self._parse_csv(parse_method, sep=sep), optional=optional 6048 ) 6049 6050 def _parse_wrapped(self, parse_method: t.Callable, optional: bool = False) -> t.Any: 6051 wrapped = self._match(TokenType.L_PAREN) 6052 if not wrapped and not optional: 6053 self.raise_error("Expecting (") 6054 parse_result = parse_method() 6055 if wrapped: 6056 self._match_r_paren() 6057 return parse_result 6058 6059 def _parse_expressions(self) -> t.List[exp.Expression]: 6060 return self._parse_csv(self._parse_expression) 6061 6062 def _parse_select_or_expression(self, alias: bool = False) -> t.Optional[exp.Expression]: 6063 return self._parse_select() or self._parse_set_operations( 6064 self._parse_expression() if alias else self._parse_assignment() 6065 ) 6066 6067 def _parse_ddl_select(self) -> t.Optional[exp.Expression]: 6068 return self._parse_query_modifiers( 6069 self._parse_set_operations(self._parse_select(nested=True, parse_subquery_alias=False)) 6070 ) 6071 6072 def _parse_transaction(self) -> exp.Transaction | exp.Command: 6073 this = None 6074 if self._match_texts(self.TRANSACTION_KIND): 6075 this = self._prev.text 6076 6077 self._match_texts(("TRANSACTION", "WORK")) 6078 6079 modes = [] 6080 while True: 6081 mode = [] 6082 while self._match(TokenType.VAR): 6083 mode.append(self._prev.text) 6084 6085 if mode: 6086 modes.append(" ".join(mode)) 6087 if not self._match(TokenType.COMMA): 6088 break 6089 6090 return self.expression(exp.Transaction, this=this, modes=modes) 6091 6092 def _parse_commit_or_rollback(self) -> exp.Commit | exp.Rollback: 6093 chain = None 6094 savepoint = None 6095 is_rollback = self._prev.token_type == TokenType.ROLLBACK 6096 6097 self._match_texts(("TRANSACTION", "WORK")) 6098 6099 if self._match_text_seq("TO"): 6100 self._match_text_seq("SAVEPOINT") 6101 savepoint = self._parse_id_var() 6102 6103 if self._match(TokenType.AND): 6104 chain = not self._match_text_seq("NO") 6105 self._match_text_seq("CHAIN") 6106 6107 if is_rollback: 6108 return self.expression(exp.Rollback, savepoint=savepoint) 6109 6110 return self.expression(exp.Commit, chain=chain) 6111 6112 def _parse_refresh(self) -> exp.Refresh: 6113 self._match(TokenType.TABLE) 6114 return self.expression(exp.Refresh, this=self._parse_string() or self._parse_table()) 6115 6116 def _parse_add_column(self) -> t.Optional[exp.Expression]: 6117 if not self._match_text_seq("ADD"): 6118 return None 6119 6120 self._match(TokenType.COLUMN) 6121 exists_column = self._parse_exists(not_=True) 6122 expression = self._parse_field_def() 6123 6124 if expression: 6125 expression.set("exists", exists_column) 6126 6127 # https://docs.databricks.com/delta/update-schema.html#explicitly-update-schema-to-add-columns 6128 if self._match_texts(("FIRST", "AFTER")): 6129 position = self._prev.text 6130 column_position = self.expression( 6131 exp.ColumnPosition, this=self._parse_column(), position=position 6132 ) 6133 expression.set("position", column_position) 6134 6135 return expression 6136 6137 def _parse_drop_column(self) -> t.Optional[exp.Drop | exp.Command]: 6138 drop = self._match(TokenType.DROP) and self._parse_drop() 6139 if drop and not isinstance(drop, exp.Command): 6140 drop.set("kind", drop.args.get("kind", "COLUMN")) 6141 return drop 6142 6143 # https://docs.aws.amazon.com/athena/latest/ug/alter-table-drop-partition.html 6144 def _parse_drop_partition(self, exists: t.Optional[bool] = None) -> exp.DropPartition: 6145 return self.expression( 6146 exp.DropPartition, expressions=self._parse_csv(self._parse_partition), exists=exists 6147 ) 6148 6149 def _parse_alter_table_add(self) -> t.List[exp.Expression]: 6150 index = self._index - 1 6151 6152 if self._match_set(self.ADD_CONSTRAINT_TOKENS, advance=False): 6153 return self._parse_csv( 6154 lambda: self.expression( 6155 exp.AddConstraint, expressions=self._parse_csv(self._parse_constraint) 6156 ) 6157 ) 6158 6159 self._retreat(index) 6160 if not self.ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN and self._match_text_seq("ADD"): 6161 return self._parse_wrapped_csv(self._parse_field_def, optional=True) 6162 return self._parse_wrapped_csv(self._parse_add_column, optional=True) 6163 6164 def _parse_alter_table_alter(self) -> t.Optional[exp.Expression]: 6165 if self._match_texts(self.ALTER_ALTER_PARSERS): 6166 return self.ALTER_ALTER_PARSERS[self._prev.text.upper()](self) 6167 6168 # Many dialects support the ALTER [COLUMN] syntax, so if there is no 6169 # keyword after ALTER we default to parsing this statement 6170 self._match(TokenType.COLUMN) 6171 column = self._parse_field(any_token=True) 6172 6173 if self._match_pair(TokenType.DROP, TokenType.DEFAULT): 6174 return self.expression(exp.AlterColumn, this=column, drop=True) 6175 if self._match_pair(TokenType.SET, TokenType.DEFAULT): 6176 return self.expression(exp.AlterColumn, this=column, default=self._parse_assignment()) 6177 if self._match(TokenType.COMMENT): 6178 return self.expression(exp.AlterColumn, this=column, comment=self._parse_string()) 6179 if self._match_text_seq("DROP", "NOT", "NULL"): 6180 return self.expression( 6181 exp.AlterColumn, 6182 this=column, 6183 drop=True, 6184 allow_null=True, 6185 ) 6186 if self._match_text_seq("SET", "NOT", "NULL"): 6187 return self.expression( 6188 exp.AlterColumn, 6189 this=column, 6190 allow_null=False, 6191 ) 6192 self._match_text_seq("SET", "DATA") 6193 self._match_text_seq("TYPE") 6194 return self.expression( 6195 exp.AlterColumn, 6196 this=column, 6197 dtype=self._parse_types(), 6198 collate=self._match(TokenType.COLLATE) and self._parse_term(), 6199 using=self._match(TokenType.USING) and self._parse_assignment(), 6200 ) 6201 6202 def _parse_alter_diststyle(self) -> exp.AlterDistStyle: 6203 if self._match_texts(("ALL", "EVEN", "AUTO")): 6204 return self.expression(exp.AlterDistStyle, this=exp.var(self._prev.text.upper())) 6205 6206 self._match_text_seq("KEY", "DISTKEY") 6207 return self.expression(exp.AlterDistStyle, this=self._parse_column()) 6208 6209 def _parse_alter_sortkey(self, compound: t.Optional[bool] = None) -> exp.AlterSortKey: 6210 if compound: 6211 self._match_text_seq("SORTKEY") 6212 6213 if self._match(TokenType.L_PAREN, advance=False): 6214 return self.expression( 6215 exp.AlterSortKey, expressions=self._parse_wrapped_id_vars(), compound=compound 6216 ) 6217 6218 self._match_texts(("AUTO", "NONE")) 6219 return self.expression( 6220 exp.AlterSortKey, this=exp.var(self._prev.text.upper()), compound=compound 6221 ) 6222 6223 def _parse_alter_table_drop(self) -> t.List[exp.Expression]: 6224 index = self._index - 1 6225 6226 partition_exists = self._parse_exists() 6227 if self._match(TokenType.PARTITION, advance=False): 6228 return self._parse_csv(lambda: self._parse_drop_partition(exists=partition_exists)) 6229 6230 self._retreat(index) 6231 return self._parse_csv(self._parse_drop_column) 6232 6233 def _parse_alter_table_rename(self) -> t.Optional[exp.RenameTable | exp.RenameColumn]: 6234 if self._match(TokenType.COLUMN): 6235 exists = self._parse_exists() 6236 old_column = self._parse_column() 6237 to = self._match_text_seq("TO") 6238 new_column = self._parse_column() 6239 6240 if old_column is None or to is None or new_column is None: 6241 return None 6242 6243 return self.expression(exp.RenameColumn, this=old_column, to=new_column, exists=exists) 6244 6245 self._match_text_seq("TO") 6246 return self.expression(exp.RenameTable, this=self._parse_table(schema=True)) 6247 6248 def _parse_alter_table_set(self) -> exp.AlterSet: 6249 alter_set = self.expression(exp.AlterSet) 6250 6251 if self._match(TokenType.L_PAREN, advance=False) or self._match_text_seq( 6252 "TABLE", "PROPERTIES" 6253 ): 6254 alter_set.set("expressions", self._parse_wrapped_csv(self._parse_assignment)) 6255 elif self._match_text_seq("FILESTREAM_ON", advance=False): 6256 alter_set.set("expressions", [self._parse_assignment()]) 6257 elif self._match_texts(("LOGGED", "UNLOGGED")): 6258 alter_set.set("option", exp.var(self._prev.text.upper())) 6259 elif self._match_text_seq("WITHOUT") and self._match_texts(("CLUSTER", "OIDS")): 6260 alter_set.set("option", exp.var(f"WITHOUT {self._prev.text.upper()}")) 6261 elif self._match_text_seq("LOCATION"): 6262 alter_set.set("location", self._parse_field()) 6263 elif self._match_text_seq("ACCESS", "METHOD"): 6264 alter_set.set("access_method", self._parse_field()) 6265 elif self._match_text_seq("TABLESPACE"): 6266 alter_set.set("tablespace", self._parse_field()) 6267 elif self._match_text_seq("FILE", "FORMAT") or self._match_text_seq("FILEFORMAT"): 6268 alter_set.set("file_format", [self._parse_field()]) 6269 elif self._match_text_seq("STAGE_FILE_FORMAT"): 6270 alter_set.set("file_format", self._parse_wrapped_options()) 6271 elif self._match_text_seq("STAGE_COPY_OPTIONS"): 6272 alter_set.set("copy_options", self._parse_wrapped_options()) 6273 elif self._match_text_seq("TAG") or self._match_text_seq("TAGS"): 6274 alter_set.set("tag", self._parse_csv(self._parse_assignment)) 6275 else: 6276 if self._match_text_seq("SERDE"): 6277 alter_set.set("serde", self._parse_field()) 6278 6279 alter_set.set("expressions", [self._parse_properties()]) 6280 6281 return alter_set 6282 6283 def _parse_alter(self) -> exp.AlterTable | exp.Command: 6284 start = self._prev 6285 6286 if not self._match(TokenType.TABLE): 6287 return self._parse_as_command(start) 6288 6289 exists = self._parse_exists() 6290 only = self._match_text_seq("ONLY") 6291 this = self._parse_table(schema=True) 6292 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 6293 6294 if self._next: 6295 self._advance() 6296 6297 parser = self.ALTER_PARSERS.get(self._prev.text.upper()) if self._prev else None 6298 if parser: 6299 actions = ensure_list(parser(self)) 6300 options = self._parse_csv(self._parse_property) 6301 6302 if not self._curr and actions: 6303 return self.expression( 6304 exp.AlterTable, 6305 this=this, 6306 exists=exists, 6307 actions=actions, 6308 only=only, 6309 options=options, 6310 cluster=cluster, 6311 ) 6312 6313 return self._parse_as_command(start) 6314 6315 def _parse_merge(self) -> exp.Merge: 6316 self._match(TokenType.INTO) 6317 target = self._parse_table() 6318 6319 if target and self._match(TokenType.ALIAS, advance=False): 6320 target.set("alias", self._parse_table_alias()) 6321 6322 self._match(TokenType.USING) 6323 using = self._parse_table() 6324 6325 self._match(TokenType.ON) 6326 on = self._parse_assignment() 6327 6328 return self.expression( 6329 exp.Merge, 6330 this=target, 6331 using=using, 6332 on=on, 6333 expressions=self._parse_when_matched(), 6334 ) 6335 6336 def _parse_when_matched(self) -> t.List[exp.When]: 6337 whens = [] 6338 6339 while self._match(TokenType.WHEN): 6340 matched = not self._match(TokenType.NOT) 6341 self._match_text_seq("MATCHED") 6342 source = ( 6343 False 6344 if self._match_text_seq("BY", "TARGET") 6345 else self._match_text_seq("BY", "SOURCE") 6346 ) 6347 condition = self._parse_assignment() if self._match(TokenType.AND) else None 6348 6349 self._match(TokenType.THEN) 6350 6351 if self._match(TokenType.INSERT): 6352 _this = self._parse_star() 6353 if _this: 6354 then: t.Optional[exp.Expression] = self.expression(exp.Insert, this=_this) 6355 else: 6356 then = self.expression( 6357 exp.Insert, 6358 this=self._parse_value(), 6359 expression=self._match_text_seq("VALUES") and self._parse_value(), 6360 ) 6361 elif self._match(TokenType.UPDATE): 6362 expressions = self._parse_star() 6363 if expressions: 6364 then = self.expression(exp.Update, expressions=expressions) 6365 else: 6366 then = self.expression( 6367 exp.Update, 6368 expressions=self._match(TokenType.SET) 6369 and self._parse_csv(self._parse_equality), 6370 ) 6371 elif self._match(TokenType.DELETE): 6372 then = self.expression(exp.Var, this=self._prev.text) 6373 else: 6374 then = None 6375 6376 whens.append( 6377 self.expression( 6378 exp.When, 6379 matched=matched, 6380 source=source, 6381 condition=condition, 6382 then=then, 6383 ) 6384 ) 6385 return whens 6386 6387 def _parse_show(self) -> t.Optional[exp.Expression]: 6388 parser = self._find_parser(self.SHOW_PARSERS, self.SHOW_TRIE) 6389 if parser: 6390 return parser(self) 6391 return self._parse_as_command(self._prev) 6392 6393 def _parse_set_item_assignment( 6394 self, kind: t.Optional[str] = None 6395 ) -> t.Optional[exp.Expression]: 6396 index = self._index 6397 6398 if kind in ("GLOBAL", "SESSION") and self._match_text_seq("TRANSACTION"): 6399 return self._parse_set_transaction(global_=kind == "GLOBAL") 6400 6401 left = self._parse_primary() or self._parse_column() 6402 assignment_delimiter = self._match_texts(("=", "TO")) 6403 6404 if not left or (self.SET_REQUIRES_ASSIGNMENT_DELIMITER and not assignment_delimiter): 6405 self._retreat(index) 6406 return None 6407 6408 right = self._parse_statement() or self._parse_id_var() 6409 if isinstance(right, (exp.Column, exp.Identifier)): 6410 right = exp.var(right.name) 6411 6412 this = self.expression(exp.EQ, this=left, expression=right) 6413 return self.expression(exp.SetItem, this=this, kind=kind) 6414 6415 def _parse_set_transaction(self, global_: bool = False) -> exp.Expression: 6416 self._match_text_seq("TRANSACTION") 6417 characteristics = self._parse_csv( 6418 lambda: self._parse_var_from_options(self.TRANSACTION_CHARACTERISTICS) 6419 ) 6420 return self.expression( 6421 exp.SetItem, 6422 expressions=characteristics, 6423 kind="TRANSACTION", 6424 **{"global": global_}, # type: ignore 6425 ) 6426 6427 def _parse_set_item(self) -> t.Optional[exp.Expression]: 6428 parser = self._find_parser(self.SET_PARSERS, self.SET_TRIE) 6429 return parser(self) if parser else self._parse_set_item_assignment(kind=None) 6430 6431 def _parse_set(self, unset: bool = False, tag: bool = False) -> exp.Set | exp.Command: 6432 index = self._index 6433 set_ = self.expression( 6434 exp.Set, expressions=self._parse_csv(self._parse_set_item), unset=unset, tag=tag 6435 ) 6436 6437 if self._curr: 6438 self._retreat(index) 6439 return self._parse_as_command(self._prev) 6440 6441 return set_ 6442 6443 def _parse_var_from_options( 6444 self, options: OPTIONS_TYPE, raise_unmatched: bool = True 6445 ) -> t.Optional[exp.Var]: 6446 start = self._curr 6447 if not start: 6448 return None 6449 6450 option = start.text.upper() 6451 continuations = options.get(option) 6452 6453 index = self._index 6454 self._advance() 6455 for keywords in continuations or []: 6456 if isinstance(keywords, str): 6457 keywords = (keywords,) 6458 6459 if self._match_text_seq(*keywords): 6460 option = f"{option} {' '.join(keywords)}" 6461 break 6462 else: 6463 if continuations or continuations is None: 6464 if raise_unmatched: 6465 self.raise_error(f"Unknown option {option}") 6466 6467 self._retreat(index) 6468 return None 6469 6470 return exp.var(option) 6471 6472 def _parse_as_command(self, start: Token) -> exp.Command: 6473 while self._curr: 6474 self._advance() 6475 text = self._find_sql(start, self._prev) 6476 size = len(start.text) 6477 self._warn_unsupported() 6478 return exp.Command(this=text[:size], expression=text[size:]) 6479 6480 def _parse_dict_property(self, this: str) -> exp.DictProperty: 6481 settings = [] 6482 6483 self._match_l_paren() 6484 kind = self._parse_id_var() 6485 6486 if self._match(TokenType.L_PAREN): 6487 while True: 6488 key = self._parse_id_var() 6489 value = self._parse_primary() 6490 6491 if not key and value is None: 6492 break 6493 settings.append(self.expression(exp.DictSubProperty, this=key, value=value)) 6494 self._match(TokenType.R_PAREN) 6495 6496 self._match_r_paren() 6497 6498 return self.expression( 6499 exp.DictProperty, 6500 this=this, 6501 kind=kind.this if kind else None, 6502 settings=settings, 6503 ) 6504 6505 def _parse_dict_range(self, this: str) -> exp.DictRange: 6506 self._match_l_paren() 6507 has_min = self._match_text_seq("MIN") 6508 if has_min: 6509 min = self._parse_var() or self._parse_primary() 6510 self._match_text_seq("MAX") 6511 max = self._parse_var() or self._parse_primary() 6512 else: 6513 max = self._parse_var() or self._parse_primary() 6514 min = exp.Literal.number(0) 6515 self._match_r_paren() 6516 return self.expression(exp.DictRange, this=this, min=min, max=max) 6517 6518 def _parse_comprehension( 6519 self, this: t.Optional[exp.Expression] 6520 ) -> t.Optional[exp.Comprehension]: 6521 index = self._index 6522 expression = self._parse_column() 6523 if not self._match(TokenType.IN): 6524 self._retreat(index - 1) 6525 return None 6526 iterator = self._parse_column() 6527 condition = self._parse_assignment() if self._match_text_seq("IF") else None 6528 return self.expression( 6529 exp.Comprehension, 6530 this=this, 6531 expression=expression, 6532 iterator=iterator, 6533 condition=condition, 6534 ) 6535 6536 def _parse_heredoc(self) -> t.Optional[exp.Heredoc]: 6537 if self._match(TokenType.HEREDOC_STRING): 6538 return self.expression(exp.Heredoc, this=self._prev.text) 6539 6540 if not self._match_text_seq("$"): 6541 return None 6542 6543 tags = ["$"] 6544 tag_text = None 6545 6546 if self._is_connected(): 6547 self._advance() 6548 tags.append(self._prev.text.upper()) 6549 else: 6550 self.raise_error("No closing $ found") 6551 6552 if tags[-1] != "$": 6553 if self._is_connected() and self._match_text_seq("$"): 6554 tag_text = tags[-1] 6555 tags.append("$") 6556 else: 6557 self.raise_error("No closing $ found") 6558 6559 heredoc_start = self._curr 6560 6561 while self._curr: 6562 if self._match_text_seq(*tags, advance=False): 6563 this = self._find_sql(heredoc_start, self._prev) 6564 self._advance(len(tags)) 6565 return self.expression(exp.Heredoc, this=this, tag=tag_text) 6566 6567 self._advance() 6568 6569 self.raise_error(f"No closing {''.join(tags)} found") 6570 return None 6571 6572 def _find_parser( 6573 self, parsers: t.Dict[str, t.Callable], trie: t.Dict 6574 ) -> t.Optional[t.Callable]: 6575 if not self._curr: 6576 return None 6577 6578 index = self._index 6579 this = [] 6580 while True: 6581 # The current token might be multiple words 6582 curr = self._curr.text.upper() 6583 key = curr.split(" ") 6584 this.append(curr) 6585 6586 self._advance() 6587 result, trie = in_trie(trie, key) 6588 if result == TrieResult.FAILED: 6589 break 6590 6591 if result == TrieResult.EXISTS: 6592 subparser = parsers[" ".join(this)] 6593 return subparser 6594 6595 self._retreat(index) 6596 return None 6597 6598 def _match(self, token_type, advance=True, expression=None): 6599 if not self._curr: 6600 return None 6601 6602 if self._curr.token_type == token_type: 6603 if advance: 6604 self._advance() 6605 self._add_comments(expression) 6606 return True 6607 6608 return None 6609 6610 def _match_set(self, types, advance=True): 6611 if not self._curr: 6612 return None 6613 6614 if self._curr.token_type in types: 6615 if advance: 6616 self._advance() 6617 return True 6618 6619 return None 6620 6621 def _match_pair(self, token_type_a, token_type_b, advance=True): 6622 if not self._curr or not self._next: 6623 return None 6624 6625 if self._curr.token_type == token_type_a and self._next.token_type == token_type_b: 6626 if advance: 6627 self._advance(2) 6628 return True 6629 6630 return None 6631 6632 def _match_l_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 6633 if not self._match(TokenType.L_PAREN, expression=expression): 6634 self.raise_error("Expecting (") 6635 6636 def _match_r_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 6637 if not self._match(TokenType.R_PAREN, expression=expression): 6638 self.raise_error("Expecting )") 6639 6640 def _match_texts(self, texts, advance=True): 6641 if self._curr and self._curr.text.upper() in texts: 6642 if advance: 6643 self._advance() 6644 return True 6645 return None 6646 6647 def _match_text_seq(self, *texts, advance=True): 6648 index = self._index 6649 for text in texts: 6650 if self._curr and self._curr.text.upper() == text: 6651 self._advance() 6652 else: 6653 self._retreat(index) 6654 return None 6655 6656 if not advance: 6657 self._retreat(index) 6658 6659 return True 6660 6661 def _replace_lambda( 6662 self, node: t.Optional[exp.Expression], expressions: t.List[exp.Expression] 6663 ) -> t.Optional[exp.Expression]: 6664 if not node: 6665 return node 6666 6667 lambda_types = {e.name: e.args.get("to") or False for e in expressions} 6668 6669 for column in node.find_all(exp.Column): 6670 typ = lambda_types.get(column.parts[0].name) 6671 if typ is not None: 6672 dot_or_id = column.to_dot() if column.table else column.this 6673 6674 if typ: 6675 dot_or_id = self.expression( 6676 exp.Cast, 6677 this=dot_or_id, 6678 to=typ, 6679 ) 6680 6681 parent = column.parent 6682 6683 while isinstance(parent, exp.Dot): 6684 if not isinstance(parent.parent, exp.Dot): 6685 parent.replace(dot_or_id) 6686 break 6687 parent = parent.parent 6688 else: 6689 if column is node: 6690 node = dot_or_id 6691 else: 6692 column.replace(dot_or_id) 6693 return node 6694 6695 def _parse_truncate_table(self) -> t.Optional[exp.TruncateTable] | exp.Expression: 6696 start = self._prev 6697 6698 # Not to be confused with TRUNCATE(number, decimals) function call 6699 if self._match(TokenType.L_PAREN): 6700 self._retreat(self._index - 2) 6701 return self._parse_function() 6702 6703 # Clickhouse supports TRUNCATE DATABASE as well 6704 is_database = self._match(TokenType.DATABASE) 6705 6706 self._match(TokenType.TABLE) 6707 6708 exists = self._parse_exists(not_=False) 6709 6710 expressions = self._parse_csv( 6711 lambda: self._parse_table(schema=True, is_db_reference=is_database) 6712 ) 6713 6714 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 6715 6716 if self._match_text_seq("RESTART", "IDENTITY"): 6717 identity = "RESTART" 6718 elif self._match_text_seq("CONTINUE", "IDENTITY"): 6719 identity = "CONTINUE" 6720 else: 6721 identity = None 6722 6723 if self._match_text_seq("CASCADE") or self._match_text_seq("RESTRICT"): 6724 option = self._prev.text 6725 else: 6726 option = None 6727 6728 partition = self._parse_partition() 6729 6730 # Fallback case 6731 if self._curr: 6732 return self._parse_as_command(start) 6733 6734 return self.expression( 6735 exp.TruncateTable, 6736 expressions=expressions, 6737 is_database=is_database, 6738 exists=exists, 6739 cluster=cluster, 6740 identity=identity, 6741 option=option, 6742 partition=partition, 6743 ) 6744 6745 def _parse_with_operator(self) -> t.Optional[exp.Expression]: 6746 this = self._parse_ordered(self._parse_opclass) 6747 6748 if not self._match(TokenType.WITH): 6749 return this 6750 6751 op = self._parse_var(any_token=True) 6752 6753 return self.expression(exp.WithOperator, this=this, op=op) 6754 6755 def _parse_wrapped_options(self) -> t.List[t.Optional[exp.Expression]]: 6756 self._match(TokenType.EQ) 6757 self._match(TokenType.L_PAREN) 6758 6759 opts: t.List[t.Optional[exp.Expression]] = [] 6760 while self._curr and not self._match(TokenType.R_PAREN): 6761 if self._match_text_seq("FORMAT_NAME", "="): 6762 # The FORMAT_NAME can be set to an identifier for Snowflake and T-SQL, 6763 # so we parse it separately to use _parse_field() 6764 prop = self.expression( 6765 exp.Property, this=exp.var("FORMAT_NAME"), value=self._parse_field() 6766 ) 6767 opts.append(prop) 6768 else: 6769 opts.append(self._parse_property()) 6770 6771 self._match(TokenType.COMMA) 6772 6773 return opts 6774 6775 def _parse_copy_parameters(self) -> t.List[exp.CopyParameter]: 6776 sep = TokenType.COMMA if self.dialect.COPY_PARAMS_ARE_CSV else None 6777 6778 options = [] 6779 while self._curr and not self._match(TokenType.R_PAREN, advance=False): 6780 option = self._parse_var(any_token=True) 6781 prev = self._prev.text.upper() 6782 6783 # Different dialects might separate options and values by white space, "=" and "AS" 6784 self._match(TokenType.EQ) 6785 self._match(TokenType.ALIAS) 6786 6787 param = self.expression(exp.CopyParameter, this=option) 6788 6789 if prev in self.COPY_INTO_VARLEN_OPTIONS and self._match( 6790 TokenType.L_PAREN, advance=False 6791 ): 6792 # Snowflake FILE_FORMAT case, Databricks COPY & FORMAT options 6793 param.set("expressions", self._parse_wrapped_options()) 6794 elif prev == "FILE_FORMAT": 6795 # T-SQL's external file format case 6796 param.set("expression", self._parse_field()) 6797 else: 6798 param.set("expression", self._parse_unquoted_field()) 6799 6800 options.append(param) 6801 self._match(sep) 6802 6803 return options 6804 6805 def _parse_credentials(self) -> t.Optional[exp.Credentials]: 6806 expr = self.expression(exp.Credentials) 6807 6808 if self._match_text_seq("STORAGE_INTEGRATION", "="): 6809 expr.set("storage", self._parse_field()) 6810 if self._match_text_seq("CREDENTIALS"): 6811 # Snowflake case: CREDENTIALS = (...), Redshift case: CREDENTIALS <string> 6812 creds = ( 6813 self._parse_wrapped_options() if self._match(TokenType.EQ) else self._parse_field() 6814 ) 6815 expr.set("credentials", creds) 6816 if self._match_text_seq("ENCRYPTION"): 6817 expr.set("encryption", self._parse_wrapped_options()) 6818 if self._match_text_seq("IAM_ROLE"): 6819 expr.set("iam_role", self._parse_field()) 6820 if self._match_text_seq("REGION"): 6821 expr.set("region", self._parse_field()) 6822 6823 return expr 6824 6825 def _parse_file_location(self) -> t.Optional[exp.Expression]: 6826 return self._parse_field() 6827 6828 def _parse_copy(self) -> exp.Copy | exp.Command: 6829 start = self._prev 6830 6831 self._match(TokenType.INTO) 6832 6833 this = ( 6834 self._parse_select(nested=True, parse_subquery_alias=False) 6835 if self._match(TokenType.L_PAREN, advance=False) 6836 else self._parse_table(schema=True) 6837 ) 6838 6839 kind = self._match(TokenType.FROM) or not self._match_text_seq("TO") 6840 6841 files = self._parse_csv(self._parse_file_location) 6842 credentials = self._parse_credentials() 6843 6844 self._match_text_seq("WITH") 6845 6846 params = self._parse_wrapped(self._parse_copy_parameters, optional=True) 6847 6848 # Fallback case 6849 if self._curr: 6850 return self._parse_as_command(start) 6851 6852 return self.expression( 6853 exp.Copy, 6854 this=this, 6855 kind=kind, 6856 credentials=credentials, 6857 files=files, 6858 params=params, 6859 )
Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree.
Arguments:
- error_level: The desired error level. Default: ErrorLevel.IMMEDIATE
- error_message_context: The amount of context to capture from a query string when displaying the error message (in number of characters). Default: 100
- max_errors: Maximum number of error messages to include in a raised ParseError. This is only relevant if error_level is ErrorLevel.RAISE. Default: 3
1238 def __init__( 1239 self, 1240 error_level: t.Optional[ErrorLevel] = None, 1241 error_message_context: int = 100, 1242 max_errors: int = 3, 1243 dialect: DialectType = None, 1244 ): 1245 from sqlglot.dialects import Dialect 1246 1247 self.error_level = error_level or ErrorLevel.IMMEDIATE 1248 self.error_message_context = error_message_context 1249 self.max_errors = max_errors 1250 self.dialect = Dialect.get_or_raise(dialect) 1251 self.reset()
1263 def parse( 1264 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 1265 ) -> t.List[t.Optional[exp.Expression]]: 1266 """ 1267 Parses a list of tokens and returns a list of syntax trees, one tree 1268 per parsed SQL statement. 1269 1270 Args: 1271 raw_tokens: The list of tokens. 1272 sql: The original SQL string, used to produce helpful debug messages. 1273 1274 Returns: 1275 The list of the produced syntax trees. 1276 """ 1277 return self._parse( 1278 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 1279 )
Parses a list of tokens and returns a list of syntax trees, one tree per parsed SQL statement.
Arguments:
- raw_tokens: The list of tokens.
- sql: The original SQL string, used to produce helpful debug messages.
Returns:
The list of the produced syntax trees.
1281 def parse_into( 1282 self, 1283 expression_types: exp.IntoType, 1284 raw_tokens: t.List[Token], 1285 sql: t.Optional[str] = None, 1286 ) -> t.List[t.Optional[exp.Expression]]: 1287 """ 1288 Parses a list of tokens into a given Expression type. If a collection of Expression 1289 types is given instead, this method will try to parse the token list into each one 1290 of them, stopping at the first for which the parsing succeeds. 1291 1292 Args: 1293 expression_types: The expression type(s) to try and parse the token list into. 1294 raw_tokens: The list of tokens. 1295 sql: The original SQL string, used to produce helpful debug messages. 1296 1297 Returns: 1298 The target Expression. 1299 """ 1300 errors = [] 1301 for expression_type in ensure_list(expression_types): 1302 parser = self.EXPRESSION_PARSERS.get(expression_type) 1303 if not parser: 1304 raise TypeError(f"No parser registered for {expression_type}") 1305 1306 try: 1307 return self._parse(parser, raw_tokens, sql) 1308 except ParseError as e: 1309 e.errors[0]["into_expression"] = expression_type 1310 errors.append(e) 1311 1312 raise ParseError( 1313 f"Failed to parse '{sql or raw_tokens}' into {expression_types}", 1314 errors=merge_errors(errors), 1315 ) from errors[-1]
Parses a list of tokens into a given Expression type. If a collection of Expression types is given instead, this method will try to parse the token list into each one of them, stopping at the first for which the parsing succeeds.
Arguments:
- expression_types: The expression type(s) to try and parse the token list into.
- raw_tokens: The list of tokens.
- sql: The original SQL string, used to produce helpful debug messages.
Returns:
The target Expression.
1355 def check_errors(self) -> None: 1356 """Logs or raises any found errors, depending on the chosen error level setting.""" 1357 if self.error_level == ErrorLevel.WARN: 1358 for error in self.errors: 1359 logger.error(str(error)) 1360 elif self.error_level == ErrorLevel.RAISE and self.errors: 1361 raise ParseError( 1362 concat_messages(self.errors, self.max_errors), 1363 errors=merge_errors(self.errors), 1364 )
Logs or raises any found errors, depending on the chosen error level setting.
1366 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 1367 """ 1368 Appends an error in the list of recorded errors or raises it, depending on the chosen 1369 error level setting. 1370 """ 1371 token = token or self._curr or self._prev or Token.string("") 1372 start = token.start 1373 end = token.end + 1 1374 start_context = self.sql[max(start - self.error_message_context, 0) : start] 1375 highlight = self.sql[start:end] 1376 end_context = self.sql[end : end + self.error_message_context] 1377 1378 error = ParseError.new( 1379 f"{message}. Line {token.line}, Col: {token.col}.\n" 1380 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 1381 description=message, 1382 line=token.line, 1383 col=token.col, 1384 start_context=start_context, 1385 highlight=highlight, 1386 end_context=end_context, 1387 ) 1388 1389 if self.error_level == ErrorLevel.IMMEDIATE: 1390 raise error 1391 1392 self.errors.append(error)
Appends an error in the list of recorded errors or raises it, depending on the chosen error level setting.
1394 def expression( 1395 self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs 1396 ) -> E: 1397 """ 1398 Creates a new, validated Expression. 1399 1400 Args: 1401 exp_class: The expression class to instantiate. 1402 comments: An optional list of comments to attach to the expression. 1403 kwargs: The arguments to set for the expression along with their respective values. 1404 1405 Returns: 1406 The target expression. 1407 """ 1408 instance = exp_class(**kwargs) 1409 instance.add_comments(comments) if comments else self._add_comments(instance) 1410 return self.validate_expression(instance)
Creates a new, validated Expression.
Arguments:
- exp_class: The expression class to instantiate.
- comments: An optional list of comments to attach to the expression.
- kwargs: The arguments to set for the expression along with their respective values.
Returns:
The target expression.
1417 def validate_expression(self, expression: E, args: t.Optional[t.List] = None) -> E: 1418 """ 1419 Validates an Expression, making sure that all its mandatory arguments are set. 1420 1421 Args: 1422 expression: The expression to validate. 1423 args: An optional list of items that was used to instantiate the expression, if it's a Func. 1424 1425 Returns: 1426 The validated expression. 1427 """ 1428 if self.error_level != ErrorLevel.IGNORE: 1429 for error_message in expression.error_messages(args): 1430 self.raise_error(error_message) 1431 1432 return expression
Validates an Expression, making sure that all its mandatory arguments are set.
Arguments:
- expression: The expression to validate.
- args: An optional list of items that was used to instantiate the expression, if it's a Func.
Returns:
The validated expression.