sqlglot.dialects.snowflake
1from __future__ import annotations 2 3import typing as t 4 5from sqlglot import exp, generator, parser, tokens, transforms 6from sqlglot.dialects.dialect import ( 7 Dialect, 8 NormalizationStrategy, 9 binary_from_function, 10 build_default_decimal_type, 11 build_timestamp_from_parts, 12 date_delta_sql, 13 date_trunc_to_time, 14 datestrtodate_sql, 15 build_formatted_time, 16 if_sql, 17 inline_array_sql, 18 max_or_greatest, 19 min_or_least, 20 rename_func, 21 timestamptrunc_sql, 22 timestrtotime_sql, 23 var_map_sql, 24 map_date_part, 25) 26from sqlglot.helper import flatten, is_float, is_int, seq_get 27from sqlglot.tokens import TokenType 28 29if t.TYPE_CHECKING: 30 from sqlglot._typing import E 31 32 33# from https://docs.snowflake.com/en/sql-reference/functions/to_timestamp.html 34def _build_datetime( 35 name: str, kind: exp.DataType.Type, safe: bool = False 36) -> t.Callable[[t.List], exp.Func]: 37 def _builder(args: t.List) -> exp.Func: 38 value = seq_get(args, 0) 39 int_value = value is not None and is_int(value.name) 40 41 if isinstance(value, exp.Literal): 42 # Converts calls like `TO_TIME('01:02:03')` into casts 43 if len(args) == 1 and value.is_string and not int_value: 44 return exp.cast(value, kind) 45 46 # Handles `TO_TIMESTAMP(str, fmt)` and `TO_TIMESTAMP(num, scale)` as special 47 # cases so we can transpile them, since they're relatively common 48 if kind == exp.DataType.Type.TIMESTAMP: 49 if int_value: 50 return exp.UnixToTime(this=value, scale=seq_get(args, 1)) 51 if not is_float(value.this): 52 return build_formatted_time(exp.StrToTime, "snowflake")(args) 53 54 if kind == exp.DataType.Type.DATE and not int_value: 55 formatted_exp = build_formatted_time(exp.TsOrDsToDate, "snowflake")(args) 56 formatted_exp.set("safe", safe) 57 return formatted_exp 58 59 return exp.Anonymous(this=name, expressions=args) 60 61 return _builder 62 63 64def _build_object_construct(args: t.List) -> t.Union[exp.StarMap, exp.Struct]: 65 expression = parser.build_var_map(args) 66 67 if isinstance(expression, exp.StarMap): 68 return expression 69 70 return exp.Struct( 71 expressions=[ 72 exp.PropertyEQ(this=k, expression=v) for k, v in zip(expression.keys, expression.values) 73 ] 74 ) 75 76 77def _build_datediff(args: t.List) -> exp.DateDiff: 78 return exp.DateDiff( 79 this=seq_get(args, 2), expression=seq_get(args, 1), unit=map_date_part(seq_get(args, 0)) 80 ) 81 82 83def _build_date_time_add(expr_type: t.Type[E]) -> t.Callable[[t.List], E]: 84 def _builder(args: t.List) -> E: 85 return expr_type( 86 this=seq_get(args, 2), 87 expression=seq_get(args, 1), 88 unit=map_date_part(seq_get(args, 0)), 89 ) 90 91 return _builder 92 93 94# https://docs.snowflake.com/en/sql-reference/functions/div0 95def _build_if_from_div0(args: t.List) -> exp.If: 96 cond = exp.EQ(this=seq_get(args, 1), expression=exp.Literal.number(0)) 97 true = exp.Literal.number(0) 98 false = exp.Div(this=seq_get(args, 0), expression=seq_get(args, 1)) 99 return exp.If(this=cond, true=true, false=false) 100 101 102# https://docs.snowflake.com/en/sql-reference/functions/zeroifnull 103def _build_if_from_zeroifnull(args: t.List) -> exp.If: 104 cond = exp.Is(this=seq_get(args, 0), expression=exp.Null()) 105 return exp.If(this=cond, true=exp.Literal.number(0), false=seq_get(args, 0)) 106 107 108# https://docs.snowflake.com/en/sql-reference/functions/zeroifnull 109def _build_if_from_nullifzero(args: t.List) -> exp.If: 110 cond = exp.EQ(this=seq_get(args, 0), expression=exp.Literal.number(0)) 111 return exp.If(this=cond, true=exp.Null(), false=seq_get(args, 0)) 112 113 114def _regexpilike_sql(self: Snowflake.Generator, expression: exp.RegexpILike) -> str: 115 flag = expression.text("flag") 116 117 if "i" not in flag: 118 flag += "i" 119 120 return self.func( 121 "REGEXP_LIKE", expression.this, expression.expression, exp.Literal.string(flag) 122 ) 123 124 125def _build_convert_timezone(args: t.List) -> t.Union[exp.Anonymous, exp.AtTimeZone]: 126 if len(args) == 3: 127 return exp.Anonymous(this="CONVERT_TIMEZONE", expressions=args) 128 return exp.AtTimeZone(this=seq_get(args, 1), zone=seq_get(args, 0)) 129 130 131def _build_regexp_replace(args: t.List) -> exp.RegexpReplace: 132 regexp_replace = exp.RegexpReplace.from_arg_list(args) 133 134 if not regexp_replace.args.get("replacement"): 135 regexp_replace.set("replacement", exp.Literal.string("")) 136 137 return regexp_replace 138 139 140def _show_parser(*args: t.Any, **kwargs: t.Any) -> t.Callable[[Snowflake.Parser], exp.Show]: 141 def _parse(self: Snowflake.Parser) -> exp.Show: 142 return self._parse_show_snowflake(*args, **kwargs) 143 144 return _parse 145 146 147def _date_trunc_to_time(args: t.List) -> exp.DateTrunc | exp.TimestampTrunc: 148 trunc = date_trunc_to_time(args) 149 trunc.set("unit", map_date_part(trunc.args["unit"])) 150 return trunc 151 152 153def _unqualify_unpivot_columns(expression: exp.Expression) -> exp.Expression: 154 """ 155 Snowflake doesn't allow columns referenced in UNPIVOT to be qualified, 156 so we need to unqualify them. 157 158 Example: 159 >>> from sqlglot import parse_one 160 >>> expr = parse_one("SELECT * FROM m_sales UNPIVOT(sales FOR month IN (m_sales.jan, feb, mar, april))") 161 >>> print(_unqualify_unpivot_columns(expr).sql(dialect="snowflake")) 162 SELECT * FROM m_sales UNPIVOT(sales FOR month IN (jan, feb, mar, april)) 163 """ 164 if isinstance(expression, exp.Pivot) and expression.unpivot: 165 expression = transforms.unqualify_columns(expression) 166 167 return expression 168 169 170def _flatten_structured_types_unless_iceberg(expression: exp.Expression) -> exp.Expression: 171 assert isinstance(expression, exp.Create) 172 173 def _flatten_structured_type(expression: exp.DataType) -> exp.DataType: 174 if expression.this in exp.DataType.NESTED_TYPES: 175 expression.set("expressions", None) 176 return expression 177 178 props = expression.args.get("properties") 179 if isinstance(expression.this, exp.Schema) and not (props and props.find(exp.IcebergProperty)): 180 for schema_expression in expression.this.expressions: 181 if isinstance(schema_expression, exp.ColumnDef): 182 column_type = schema_expression.kind 183 if isinstance(column_type, exp.DataType): 184 column_type.transform(_flatten_structured_type, copy=False) 185 186 return expression 187 188 189class Snowflake(Dialect): 190 # https://docs.snowflake.com/en/sql-reference/identifiers-syntax 191 NORMALIZATION_STRATEGY = NormalizationStrategy.UPPERCASE 192 NULL_ORDERING = "nulls_are_large" 193 TIME_FORMAT = "'YYYY-MM-DD HH24:MI:SS'" 194 SUPPORTS_USER_DEFINED_TYPES = False 195 SUPPORTS_SEMI_ANTI_JOIN = False 196 PREFER_CTE_ALIAS_COLUMN = True 197 TABLESAMPLE_SIZE_IS_PERCENT = True 198 COPY_PARAMS_ARE_CSV = False 199 200 TIME_MAPPING = { 201 "YYYY": "%Y", 202 "yyyy": "%Y", 203 "YY": "%y", 204 "yy": "%y", 205 "MMMM": "%B", 206 "mmmm": "%B", 207 "MON": "%b", 208 "mon": "%b", 209 "MM": "%m", 210 "mm": "%m", 211 "DD": "%d", 212 "dd": "%-d", 213 "DY": "%a", 214 "dy": "%w", 215 "HH24": "%H", 216 "hh24": "%H", 217 "HH12": "%I", 218 "hh12": "%I", 219 "MI": "%M", 220 "mi": "%M", 221 "SS": "%S", 222 "ss": "%S", 223 "FF": "%f", 224 "ff": "%f", 225 "FF6": "%f", 226 "ff6": "%f", 227 } 228 229 def quote_identifier(self, expression: E, identify: bool = True) -> E: 230 # This disables quoting DUAL in SELECT ... FROM DUAL, because Snowflake treats an 231 # unquoted DUAL keyword in a special way and does not map it to a user-defined table 232 if ( 233 isinstance(expression, exp.Identifier) 234 and isinstance(expression.parent, exp.Table) 235 and expression.name.lower() == "dual" 236 ): 237 return expression # type: ignore 238 239 return super().quote_identifier(expression, identify=identify) 240 241 class Parser(parser.Parser): 242 IDENTIFY_PIVOT_STRINGS = True 243 DEFAULT_SAMPLING_METHOD = "BERNOULLI" 244 COLON_IS_JSON_EXTRACT = True 245 246 ID_VAR_TOKENS = { 247 *parser.Parser.ID_VAR_TOKENS, 248 TokenType.MATCH_CONDITION, 249 } 250 251 TABLE_ALIAS_TOKENS = parser.Parser.TABLE_ALIAS_TOKENS | {TokenType.WINDOW} 252 TABLE_ALIAS_TOKENS.discard(TokenType.MATCH_CONDITION) 253 254 FUNCTIONS = { 255 **parser.Parser.FUNCTIONS, 256 "APPROX_PERCENTILE": exp.ApproxQuantile.from_arg_list, 257 "ARRAYAGG": exp.ArrayAgg.from_arg_list, 258 "ARRAY_CONSTRUCT": exp.Array.from_arg_list, 259 "ARRAY_CONTAINS": lambda args: exp.ArrayContains( 260 this=seq_get(args, 1), expression=seq_get(args, 0) 261 ), 262 "ARRAY_GENERATE_RANGE": lambda args: exp.GenerateSeries( 263 # ARRAY_GENERATE_RANGE has an exlusive end; we normalize it to be inclusive 264 start=seq_get(args, 0), 265 end=exp.Sub(this=seq_get(args, 1), expression=exp.Literal.number(1)), 266 step=seq_get(args, 2), 267 ), 268 "BITXOR": binary_from_function(exp.BitwiseXor), 269 "BIT_XOR": binary_from_function(exp.BitwiseXor), 270 "BOOLXOR": binary_from_function(exp.Xor), 271 "CONVERT_TIMEZONE": _build_convert_timezone, 272 "DATE": _build_datetime("DATE", exp.DataType.Type.DATE), 273 "DATE_TRUNC": _date_trunc_to_time, 274 "DATEADD": _build_date_time_add(exp.DateAdd), 275 "DATEDIFF": _build_datediff, 276 "DIV0": _build_if_from_div0, 277 "FLATTEN": exp.Explode.from_arg_list, 278 "GET_PATH": lambda args, dialect: exp.JSONExtract( 279 this=seq_get(args, 0), expression=dialect.to_json_path(seq_get(args, 1)) 280 ), 281 "IFF": exp.If.from_arg_list, 282 "LAST_DAY": lambda args: exp.LastDay( 283 this=seq_get(args, 0), unit=map_date_part(seq_get(args, 1)) 284 ), 285 "LISTAGG": exp.GroupConcat.from_arg_list, 286 "MEDIAN": lambda args: exp.PercentileCont( 287 this=seq_get(args, 0), expression=exp.Literal.number(0.5) 288 ), 289 "NULLIFZERO": _build_if_from_nullifzero, 290 "OBJECT_CONSTRUCT": _build_object_construct, 291 "REGEXP_REPLACE": _build_regexp_replace, 292 "REGEXP_SUBSTR": exp.RegexpExtract.from_arg_list, 293 "RLIKE": exp.RegexpLike.from_arg_list, 294 "SQUARE": lambda args: exp.Pow(this=seq_get(args, 0), expression=exp.Literal.number(2)), 295 "TIMEADD": _build_date_time_add(exp.TimeAdd), 296 "TIMEDIFF": _build_datediff, 297 "TIMESTAMPADD": _build_date_time_add(exp.DateAdd), 298 "TIMESTAMPDIFF": _build_datediff, 299 "TIMESTAMPFROMPARTS": build_timestamp_from_parts, 300 "TIMESTAMP_FROM_PARTS": build_timestamp_from_parts, 301 "TRY_PARSE_JSON": lambda args: exp.ParseJSON(this=seq_get(args, 0), safe=True), 302 "TRY_TO_DATE": _build_datetime("TRY_TO_DATE", exp.DataType.Type.DATE, safe=True), 303 "TO_DATE": _build_datetime("TO_DATE", exp.DataType.Type.DATE), 304 "TO_NUMBER": lambda args: exp.ToNumber( 305 this=seq_get(args, 0), 306 format=seq_get(args, 1), 307 precision=seq_get(args, 2), 308 scale=seq_get(args, 3), 309 ), 310 "TO_TIME": _build_datetime("TO_TIME", exp.DataType.Type.TIME), 311 "TO_TIMESTAMP": _build_datetime("TO_TIMESTAMP", exp.DataType.Type.TIMESTAMP), 312 "TO_TIMESTAMP_LTZ": _build_datetime("TO_TIMESTAMP_LTZ", exp.DataType.Type.TIMESTAMPLTZ), 313 "TO_TIMESTAMP_NTZ": _build_datetime("TO_TIMESTAMP_NTZ", exp.DataType.Type.TIMESTAMP), 314 "TO_TIMESTAMP_TZ": _build_datetime("TO_TIMESTAMP_TZ", exp.DataType.Type.TIMESTAMPTZ), 315 "TO_VARCHAR": exp.ToChar.from_arg_list, 316 "ZEROIFNULL": _build_if_from_zeroifnull, 317 } 318 319 FUNCTION_PARSERS = { 320 **parser.Parser.FUNCTION_PARSERS, 321 "DATE_PART": lambda self: self._parse_date_part(), 322 "OBJECT_CONSTRUCT_KEEP_NULL": lambda self: self._parse_json_object(), 323 } 324 FUNCTION_PARSERS.pop("TRIM") 325 326 TIMESTAMPS = parser.Parser.TIMESTAMPS - {TokenType.TIME} 327 328 RANGE_PARSERS = { 329 **parser.Parser.RANGE_PARSERS, 330 TokenType.LIKE_ANY: parser.binary_range_parser(exp.LikeAny), 331 TokenType.ILIKE_ANY: parser.binary_range_parser(exp.ILikeAny), 332 } 333 334 ALTER_PARSERS = { 335 **parser.Parser.ALTER_PARSERS, 336 "UNSET": lambda self: self.expression( 337 exp.Set, 338 tag=self._match_text_seq("TAG"), 339 expressions=self._parse_csv(self._parse_id_var), 340 unset=True, 341 ), 342 "SWAP": lambda self: self._parse_alter_table_swap(), 343 } 344 345 STATEMENT_PARSERS = { 346 **parser.Parser.STATEMENT_PARSERS, 347 TokenType.SHOW: lambda self: self._parse_show(), 348 } 349 350 PROPERTY_PARSERS = { 351 **parser.Parser.PROPERTY_PARSERS, 352 "LOCATION": lambda self: self._parse_location_property(), 353 } 354 355 TYPE_CONVERTERS = { 356 # https://docs.snowflake.com/en/sql-reference/data-types-numeric#number 357 exp.DataType.Type.DECIMAL: build_default_decimal_type(precision=38, scale=0), 358 } 359 360 SHOW_PARSERS = { 361 "SCHEMAS": _show_parser("SCHEMAS"), 362 "TERSE SCHEMAS": _show_parser("SCHEMAS"), 363 "OBJECTS": _show_parser("OBJECTS"), 364 "TERSE OBJECTS": _show_parser("OBJECTS"), 365 "TABLES": _show_parser("TABLES"), 366 "TERSE TABLES": _show_parser("TABLES"), 367 "VIEWS": _show_parser("VIEWS"), 368 "TERSE VIEWS": _show_parser("VIEWS"), 369 "PRIMARY KEYS": _show_parser("PRIMARY KEYS"), 370 "TERSE PRIMARY KEYS": _show_parser("PRIMARY KEYS"), 371 "IMPORTED KEYS": _show_parser("IMPORTED KEYS"), 372 "TERSE IMPORTED KEYS": _show_parser("IMPORTED KEYS"), 373 "UNIQUE KEYS": _show_parser("UNIQUE KEYS"), 374 "TERSE UNIQUE KEYS": _show_parser("UNIQUE KEYS"), 375 "SEQUENCES": _show_parser("SEQUENCES"), 376 "TERSE SEQUENCES": _show_parser("SEQUENCES"), 377 "COLUMNS": _show_parser("COLUMNS"), 378 "USERS": _show_parser("USERS"), 379 "TERSE USERS": _show_parser("USERS"), 380 } 381 382 CONSTRAINT_PARSERS = { 383 **parser.Parser.CONSTRAINT_PARSERS, 384 "WITH": lambda self: self._parse_with_constraint(), 385 "MASKING": lambda self: self._parse_with_constraint(), 386 "PROJECTION": lambda self: self._parse_with_constraint(), 387 "TAG": lambda self: self._parse_with_constraint(), 388 } 389 390 STAGED_FILE_SINGLE_TOKENS = { 391 TokenType.DOT, 392 TokenType.MOD, 393 TokenType.SLASH, 394 } 395 396 FLATTEN_COLUMNS = ["SEQ", "KEY", "PATH", "INDEX", "VALUE", "THIS"] 397 398 SCHEMA_KINDS = {"OBJECTS", "TABLES", "VIEWS", "SEQUENCES", "UNIQUE KEYS", "IMPORTED KEYS"} 399 400 NON_TABLE_CREATABLES = {"STORAGE INTEGRATION", "TAG", "WAREHOUSE", "STREAMLIT"} 401 402 LAMBDAS = { 403 **parser.Parser.LAMBDAS, 404 TokenType.ARROW: lambda self, expressions: self.expression( 405 exp.Lambda, 406 this=self._replace_lambda( 407 self._parse_assignment(), 408 expressions, 409 ), 410 expressions=[e.this if isinstance(e, exp.Cast) else e for e in expressions], 411 ), 412 } 413 414 def _parse_with_constraint(self) -> t.Optional[exp.Expression]: 415 if self._prev.token_type != TokenType.WITH: 416 self._retreat(self._index - 1) 417 418 if self._match_text_seq("MASKING", "POLICY"): 419 policy = self._parse_column() 420 return self.expression( 421 exp.MaskingPolicyColumnConstraint, 422 this=policy.to_dot() if isinstance(policy, exp.Column) else policy, 423 expressions=self._match(TokenType.USING) 424 and self._parse_wrapped_csv(self._parse_id_var), 425 ) 426 if self._match_text_seq("PROJECTION", "POLICY"): 427 policy = self._parse_column() 428 return self.expression( 429 exp.ProjectionPolicyColumnConstraint, 430 this=policy.to_dot() if isinstance(policy, exp.Column) else policy, 431 ) 432 if self._match(TokenType.TAG): 433 return self.expression( 434 exp.TagColumnConstraint, 435 expressions=self._parse_wrapped_csv(self._parse_property), 436 ) 437 438 return None 439 440 def _parse_create(self) -> exp.Create | exp.Command: 441 expression = super()._parse_create() 442 if isinstance(expression, exp.Create) and expression.kind in self.NON_TABLE_CREATABLES: 443 # Replace the Table node with the enclosed Identifier 444 expression.this.replace(expression.this.this) 445 446 return expression 447 448 # https://docs.snowflake.com/en/sql-reference/functions/date_part.html 449 # https://docs.snowflake.com/en/sql-reference/functions-date-time.html#label-supported-date-time-parts 450 def _parse_date_part(self: Snowflake.Parser) -> t.Optional[exp.Expression]: 451 this = self._parse_var() or self._parse_type() 452 453 if not this: 454 return None 455 456 self._match(TokenType.COMMA) 457 expression = self._parse_bitwise() 458 this = map_date_part(this) 459 name = this.name.upper() 460 461 if name.startswith("EPOCH"): 462 if name == "EPOCH_MILLISECOND": 463 scale = 10**3 464 elif name == "EPOCH_MICROSECOND": 465 scale = 10**6 466 elif name == "EPOCH_NANOSECOND": 467 scale = 10**9 468 else: 469 scale = None 470 471 ts = self.expression(exp.Cast, this=expression, to=exp.DataType.build("TIMESTAMP")) 472 to_unix: exp.Expression = self.expression(exp.TimeToUnix, this=ts) 473 474 if scale: 475 to_unix = exp.Mul(this=to_unix, expression=exp.Literal.number(scale)) 476 477 return to_unix 478 479 return self.expression(exp.Extract, this=this, expression=expression) 480 481 def _parse_bracket_key_value(self, is_map: bool = False) -> t.Optional[exp.Expression]: 482 if is_map: 483 # Keys are strings in Snowflake's objects, see also: 484 # - https://docs.snowflake.com/en/sql-reference/data-types-semistructured 485 # - https://docs.snowflake.com/en/sql-reference/functions/object_construct 486 return self._parse_slice(self._parse_string()) 487 488 return self._parse_slice(self._parse_alias(self._parse_assignment(), explicit=True)) 489 490 def _parse_lateral(self) -> t.Optional[exp.Lateral]: 491 lateral = super()._parse_lateral() 492 if not lateral: 493 return lateral 494 495 if isinstance(lateral.this, exp.Explode): 496 table_alias = lateral.args.get("alias") 497 columns = [exp.to_identifier(col) for col in self.FLATTEN_COLUMNS] 498 if table_alias and not table_alias.args.get("columns"): 499 table_alias.set("columns", columns) 500 elif not table_alias: 501 exp.alias_(lateral, "_flattened", table=columns, copy=False) 502 503 return lateral 504 505 def _parse_at_before(self, table: exp.Table) -> exp.Table: 506 # https://docs.snowflake.com/en/sql-reference/constructs/at-before 507 index = self._index 508 if self._match_texts(("AT", "BEFORE")): 509 this = self._prev.text.upper() 510 kind = ( 511 self._match(TokenType.L_PAREN) 512 and self._match_texts(self.HISTORICAL_DATA_KIND) 513 and self._prev.text.upper() 514 ) 515 expression = self._match(TokenType.FARROW) and self._parse_bitwise() 516 517 if expression: 518 self._match_r_paren() 519 when = self.expression( 520 exp.HistoricalData, this=this, kind=kind, expression=expression 521 ) 522 table.set("when", when) 523 else: 524 self._retreat(index) 525 526 return table 527 528 def _parse_table_parts( 529 self, schema: bool = False, is_db_reference: bool = False, wildcard: bool = False 530 ) -> exp.Table: 531 # https://docs.snowflake.com/en/user-guide/querying-stage 532 if self._match(TokenType.STRING, advance=False): 533 table = self._parse_string() 534 elif self._match_text_seq("@", advance=False): 535 table = self._parse_location_path() 536 else: 537 table = None 538 539 if table: 540 file_format = None 541 pattern = None 542 543 wrapped = self._match(TokenType.L_PAREN) 544 while self._curr and wrapped and not self._match(TokenType.R_PAREN): 545 if self._match_text_seq("FILE_FORMAT", "=>"): 546 file_format = self._parse_string() or super()._parse_table_parts( 547 is_db_reference=is_db_reference 548 ) 549 elif self._match_text_seq("PATTERN", "=>"): 550 pattern = self._parse_string() 551 else: 552 break 553 554 self._match(TokenType.COMMA) 555 556 table = self.expression(exp.Table, this=table, format=file_format, pattern=pattern) 557 else: 558 table = super()._parse_table_parts(schema=schema, is_db_reference=is_db_reference) 559 560 return self._parse_at_before(table) 561 562 def _parse_id_var( 563 self, 564 any_token: bool = True, 565 tokens: t.Optional[t.Collection[TokenType]] = None, 566 ) -> t.Optional[exp.Expression]: 567 if self._match_text_seq("IDENTIFIER", "("): 568 identifier = ( 569 super()._parse_id_var(any_token=any_token, tokens=tokens) 570 or self._parse_string() 571 ) 572 self._match_r_paren() 573 return self.expression(exp.Anonymous, this="IDENTIFIER", expressions=[identifier]) 574 575 return super()._parse_id_var(any_token=any_token, tokens=tokens) 576 577 def _parse_show_snowflake(self, this: str) -> exp.Show: 578 scope = None 579 scope_kind = None 580 581 # will identity SHOW TERSE SCHEMAS but not SHOW TERSE PRIMARY KEYS 582 # which is syntactically valid but has no effect on the output 583 terse = self._tokens[self._index - 2].text.upper() == "TERSE" 584 585 history = self._match_text_seq("HISTORY") 586 587 like = self._parse_string() if self._match(TokenType.LIKE) else None 588 589 if self._match(TokenType.IN): 590 if self._match_text_seq("ACCOUNT"): 591 scope_kind = "ACCOUNT" 592 elif self._match_set(self.DB_CREATABLES): 593 scope_kind = self._prev.text.upper() 594 if self._curr: 595 scope = self._parse_table_parts() 596 elif self._curr: 597 scope_kind = "SCHEMA" if this in self.SCHEMA_KINDS else "TABLE" 598 scope = self._parse_table_parts() 599 600 return self.expression( 601 exp.Show, 602 **{ 603 "terse": terse, 604 "this": this, 605 "history": history, 606 "like": like, 607 "scope": scope, 608 "scope_kind": scope_kind, 609 "starts_with": self._match_text_seq("STARTS", "WITH") and self._parse_string(), 610 "limit": self._parse_limit(), 611 "from": self._parse_string() if self._match(TokenType.FROM) else None, 612 }, 613 ) 614 615 def _parse_alter_table_swap(self) -> exp.SwapTable: 616 self._match_text_seq("WITH") 617 return self.expression(exp.SwapTable, this=self._parse_table(schema=True)) 618 619 def _parse_location_property(self) -> exp.LocationProperty: 620 self._match(TokenType.EQ) 621 return self.expression(exp.LocationProperty, this=self._parse_location_path()) 622 623 def _parse_file_location(self) -> t.Optional[exp.Expression]: 624 # Parse either a subquery or a staged file 625 return ( 626 self._parse_select(table=True, parse_subquery_alias=False) 627 if self._match(TokenType.L_PAREN, advance=False) 628 else self._parse_table_parts() 629 ) 630 631 def _parse_location_path(self) -> exp.Var: 632 parts = [self._advance_any(ignore_reserved=True)] 633 634 # We avoid consuming a comma token because external tables like @foo and @bar 635 # can be joined in a query with a comma separator, as well as closing paren 636 # in case of subqueries 637 while self._is_connected() and not self._match_set( 638 (TokenType.COMMA, TokenType.R_PAREN), advance=False 639 ): 640 parts.append(self._advance_any(ignore_reserved=True)) 641 642 return exp.var("".join(part.text for part in parts if part)) 643 644 def _parse_lambda_arg(self) -> t.Optional[exp.Expression]: 645 this = super()._parse_lambda_arg() 646 647 if not this: 648 return this 649 650 typ = self._parse_types() 651 652 if typ: 653 return self.expression(exp.Cast, this=this, to=typ) 654 655 return this 656 657 class Tokenizer(tokens.Tokenizer): 658 STRING_ESCAPES = ["\\", "'"] 659 HEX_STRINGS = [("x'", "'"), ("X'", "'")] 660 RAW_STRINGS = ["$$"] 661 COMMENTS = ["--", "//", ("/*", "*/")] 662 663 KEYWORDS = { 664 **tokens.Tokenizer.KEYWORDS, 665 "BYTEINT": TokenType.INT, 666 "CHAR VARYING": TokenType.VARCHAR, 667 "CHARACTER VARYING": TokenType.VARCHAR, 668 "EXCLUDE": TokenType.EXCEPT, 669 "ILIKE ANY": TokenType.ILIKE_ANY, 670 "LIKE ANY": TokenType.LIKE_ANY, 671 "MATCH_CONDITION": TokenType.MATCH_CONDITION, 672 "MATCH_RECOGNIZE": TokenType.MATCH_RECOGNIZE, 673 "MINUS": TokenType.EXCEPT, 674 "NCHAR VARYING": TokenType.VARCHAR, 675 "PUT": TokenType.COMMAND, 676 "REMOVE": TokenType.COMMAND, 677 "RM": TokenType.COMMAND, 678 "SAMPLE": TokenType.TABLE_SAMPLE, 679 "SQL_DOUBLE": TokenType.DOUBLE, 680 "SQL_VARCHAR": TokenType.VARCHAR, 681 "STORAGE INTEGRATION": TokenType.STORAGE_INTEGRATION, 682 "TAG": TokenType.TAG, 683 "TIMESTAMP_TZ": TokenType.TIMESTAMPTZ, 684 "TOP": TokenType.TOP, 685 "WAREHOUSE": TokenType.WAREHOUSE, 686 "STREAMLIT": TokenType.STREAMLIT, 687 } 688 KEYWORDS.pop("/*+") 689 690 SINGLE_TOKENS = { 691 **tokens.Tokenizer.SINGLE_TOKENS, 692 "$": TokenType.PARAMETER, 693 } 694 695 VAR_SINGLE_TOKENS = {"$"} 696 697 COMMANDS = tokens.Tokenizer.COMMANDS - {TokenType.SHOW} 698 699 class Generator(generator.Generator): 700 PARAMETER_TOKEN = "$" 701 MATCHED_BY_SOURCE = False 702 SINGLE_STRING_INTERVAL = True 703 JOIN_HINTS = False 704 TABLE_HINTS = False 705 QUERY_HINTS = False 706 AGGREGATE_FILTER_SUPPORTED = False 707 SUPPORTS_TABLE_COPY = False 708 COLLATE_IS_FUNC = True 709 LIMIT_ONLY_LITERALS = True 710 JSON_KEY_VALUE_PAIR_SEP = "," 711 INSERT_OVERWRITE = " OVERWRITE INTO" 712 STRUCT_DELIMITER = ("(", ")") 713 COPY_PARAMS_ARE_WRAPPED = False 714 COPY_PARAMS_EQ_REQUIRED = True 715 STAR_EXCEPT = "EXCLUDE" 716 717 TRANSFORMS = { 718 **generator.Generator.TRANSFORMS, 719 exp.ApproxDistinct: rename_func("APPROX_COUNT_DISTINCT"), 720 exp.ArgMax: rename_func("MAX_BY"), 721 exp.ArgMin: rename_func("MIN_BY"), 722 exp.Array: inline_array_sql, 723 exp.ArrayConcat: rename_func("ARRAY_CAT"), 724 exp.ArrayContains: lambda self, e: self.func("ARRAY_CONTAINS", e.expression, e.this), 725 exp.AtTimeZone: lambda self, e: self.func( 726 "CONVERT_TIMEZONE", e.args.get("zone"), e.this 727 ), 728 exp.BitwiseXor: rename_func("BITXOR"), 729 exp.Create: transforms.preprocess([_flatten_structured_types_unless_iceberg]), 730 exp.DateAdd: date_delta_sql("DATEADD"), 731 exp.DateDiff: date_delta_sql("DATEDIFF"), 732 exp.DateStrToDate: datestrtodate_sql, 733 exp.DayOfMonth: rename_func("DAYOFMONTH"), 734 exp.DayOfWeek: rename_func("DAYOFWEEK"), 735 exp.DayOfYear: rename_func("DAYOFYEAR"), 736 exp.Explode: rename_func("FLATTEN"), 737 exp.Extract: rename_func("DATE_PART"), 738 exp.FromTimeZone: lambda self, e: self.func( 739 "CONVERT_TIMEZONE", e.args.get("zone"), "'UTC'", e.this 740 ), 741 exp.GenerateSeries: lambda self, e: self.func( 742 "ARRAY_GENERATE_RANGE", e.args["start"], e.args["end"] + 1, e.args.get("step") 743 ), 744 exp.GroupConcat: rename_func("LISTAGG"), 745 exp.If: if_sql(name="IFF", false_value="NULL"), 746 exp.JSONExtract: lambda self, e: self.func("GET_PATH", e.this, e.expression), 747 exp.JSONExtractScalar: lambda self, e: self.func( 748 "JSON_EXTRACT_PATH_TEXT", e.this, e.expression 749 ), 750 exp.JSONObject: lambda self, e: self.func("OBJECT_CONSTRUCT_KEEP_NULL", *e.expressions), 751 exp.JSONPathRoot: lambda *_: "", 752 exp.LogicalAnd: rename_func("BOOLAND_AGG"), 753 exp.LogicalOr: rename_func("BOOLOR_AGG"), 754 exp.Map: lambda self, e: var_map_sql(self, e, "OBJECT_CONSTRUCT"), 755 exp.Max: max_or_greatest, 756 exp.Min: min_or_least, 757 exp.ParseJSON: lambda self, e: self.func( 758 "TRY_PARSE_JSON" if e.args.get("safe") else "PARSE_JSON", e.this 759 ), 760 exp.PartitionedByProperty: lambda self, e: f"PARTITION BY {self.sql(e, 'this')}", 761 exp.PercentileCont: transforms.preprocess( 762 [transforms.add_within_group_for_percentiles] 763 ), 764 exp.PercentileDisc: transforms.preprocess( 765 [transforms.add_within_group_for_percentiles] 766 ), 767 exp.Pivot: transforms.preprocess([_unqualify_unpivot_columns]), 768 exp.RegexpILike: _regexpilike_sql, 769 exp.Rand: rename_func("RANDOM"), 770 exp.Select: transforms.preprocess( 771 [ 772 transforms.eliminate_distinct_on, 773 transforms.explode_to_unnest(), 774 transforms.eliminate_semi_and_anti_joins, 775 ] 776 ), 777 exp.SHA: rename_func("SHA1"), 778 exp.StarMap: rename_func("OBJECT_CONSTRUCT"), 779 exp.StartsWith: rename_func("STARTSWITH"), 780 exp.StrPosition: lambda self, e: self.func( 781 "POSITION", e.args.get("substr"), e.this, e.args.get("position") 782 ), 783 exp.StrToTime: lambda self, e: self.func("TO_TIMESTAMP", e.this, self.format_time(e)), 784 exp.Stuff: rename_func("INSERT"), 785 exp.TimeAdd: date_delta_sql("TIMEADD"), 786 exp.TimestampDiff: lambda self, e: self.func( 787 "TIMESTAMPDIFF", e.unit, e.expression, e.this 788 ), 789 exp.TimestampTrunc: timestamptrunc_sql(), 790 exp.TimeStrToTime: timestrtotime_sql, 791 exp.TimeToStr: lambda self, e: self.func( 792 "TO_CHAR", exp.cast(e.this, exp.DataType.Type.TIMESTAMP), self.format_time(e) 793 ), 794 exp.TimeToUnix: lambda self, e: f"EXTRACT(epoch_second FROM {self.sql(e, 'this')})", 795 exp.ToArray: rename_func("TO_ARRAY"), 796 exp.ToChar: lambda self, e: self.function_fallback_sql(e), 797 exp.Trim: lambda self, e: self.func("TRIM", e.this, e.expression), 798 exp.TsOrDsAdd: date_delta_sql("DATEADD", cast=True), 799 exp.TsOrDsDiff: date_delta_sql("DATEDIFF"), 800 exp.TsOrDsToDate: lambda self, e: self.func( 801 "TRY_TO_DATE" if e.args.get("safe") else "TO_DATE", e.this, self.format_time(e) 802 ), 803 exp.UnixToTime: rename_func("TO_TIMESTAMP"), 804 exp.VarMap: lambda self, e: var_map_sql(self, e, "OBJECT_CONSTRUCT"), 805 exp.WeekOfYear: rename_func("WEEKOFYEAR"), 806 exp.Xor: rename_func("BOOLXOR"), 807 } 808 809 SUPPORTED_JSON_PATH_PARTS = { 810 exp.JSONPathKey, 811 exp.JSONPathRoot, 812 exp.JSONPathSubscript, 813 } 814 815 TYPE_MAPPING = { 816 **generator.Generator.TYPE_MAPPING, 817 exp.DataType.Type.NESTED: "OBJECT", 818 exp.DataType.Type.STRUCT: "OBJECT", 819 } 820 821 PROPERTIES_LOCATION = { 822 **generator.Generator.PROPERTIES_LOCATION, 823 exp.SetProperty: exp.Properties.Location.UNSUPPORTED, 824 exp.VolatileProperty: exp.Properties.Location.UNSUPPORTED, 825 } 826 827 UNSUPPORTED_VALUES_EXPRESSIONS = { 828 exp.Map, 829 exp.StarMap, 830 exp.Struct, 831 exp.VarMap, 832 } 833 834 def with_properties(self, properties: exp.Properties) -> str: 835 return self.properties(properties, wrapped=False, prefix=self.sep(""), sep=" ") 836 837 def values_sql(self, expression: exp.Values, values_as_table: bool = True) -> str: 838 if expression.find(*self.UNSUPPORTED_VALUES_EXPRESSIONS): 839 values_as_table = False 840 841 return super().values_sql(expression, values_as_table=values_as_table) 842 843 def datatype_sql(self, expression: exp.DataType) -> str: 844 expressions = expression.expressions 845 if ( 846 expressions 847 and expression.is_type(*exp.DataType.STRUCT_TYPES) 848 and any(isinstance(field_type, exp.DataType) for field_type in expressions) 849 ): 850 # The correct syntax is OBJECT [ (<key> <value_type [NOT NULL] [, ...]) ] 851 return "OBJECT" 852 853 return super().datatype_sql(expression) 854 855 def tonumber_sql(self, expression: exp.ToNumber) -> str: 856 return self.func( 857 "TO_NUMBER", 858 expression.this, 859 expression.args.get("format"), 860 expression.args.get("precision"), 861 expression.args.get("scale"), 862 ) 863 864 def timestampfromparts_sql(self, expression: exp.TimestampFromParts) -> str: 865 milli = expression.args.get("milli") 866 if milli is not None: 867 milli_to_nano = milli.pop() * exp.Literal.number(1000000) 868 expression.set("nano", milli_to_nano) 869 870 return rename_func("TIMESTAMP_FROM_PARTS")(self, expression) 871 872 def trycast_sql(self, expression: exp.TryCast) -> str: 873 value = expression.this 874 875 if value.type is None: 876 from sqlglot.optimizer.annotate_types import annotate_types 877 878 value = annotate_types(value) 879 880 if value.is_type(*exp.DataType.TEXT_TYPES, exp.DataType.Type.UNKNOWN): 881 return super().trycast_sql(expression) 882 883 # TRY_CAST only works for string values in Snowflake 884 return self.cast_sql(expression) 885 886 def log_sql(self, expression: exp.Log) -> str: 887 if not expression.expression: 888 return self.func("LN", expression.this) 889 890 return super().log_sql(expression) 891 892 def unnest_sql(self, expression: exp.Unnest) -> str: 893 unnest_alias = expression.args.get("alias") 894 offset = expression.args.get("offset") 895 896 columns = [ 897 exp.to_identifier("seq"), 898 exp.to_identifier("key"), 899 exp.to_identifier("path"), 900 offset.pop() if isinstance(offset, exp.Expression) else exp.to_identifier("index"), 901 seq_get(unnest_alias.columns if unnest_alias else [], 0) 902 or exp.to_identifier("value"), 903 exp.to_identifier("this"), 904 ] 905 906 if unnest_alias: 907 unnest_alias.set("columns", columns) 908 else: 909 unnest_alias = exp.TableAlias(this="_u", columns=columns) 910 911 explode = f"TABLE(FLATTEN(INPUT => {self.sql(expression.expressions[0])}))" 912 alias = self.sql(unnest_alias) 913 alias = f" AS {alias}" if alias else "" 914 return f"{explode}{alias}" 915 916 def show_sql(self, expression: exp.Show) -> str: 917 terse = "TERSE " if expression.args.get("terse") else "" 918 history = " HISTORY" if expression.args.get("history") else "" 919 like = self.sql(expression, "like") 920 like = f" LIKE {like}" if like else "" 921 922 scope = self.sql(expression, "scope") 923 scope = f" {scope}" if scope else "" 924 925 scope_kind = self.sql(expression, "scope_kind") 926 if scope_kind: 927 scope_kind = f" IN {scope_kind}" 928 929 starts_with = self.sql(expression, "starts_with") 930 if starts_with: 931 starts_with = f" STARTS WITH {starts_with}" 932 933 limit = self.sql(expression, "limit") 934 935 from_ = self.sql(expression, "from") 936 if from_: 937 from_ = f" FROM {from_}" 938 939 return f"SHOW {terse}{expression.name}{history}{like}{scope_kind}{scope}{starts_with}{limit}{from_}" 940 941 def regexpextract_sql(self, expression: exp.RegexpExtract) -> str: 942 # Other dialects don't support all of the following parameters, so we need to 943 # generate default values as necessary to ensure the transpilation is correct 944 group = expression.args.get("group") 945 parameters = expression.args.get("parameters") or (group and exp.Literal.string("c")) 946 occurrence = expression.args.get("occurrence") or (parameters and exp.Literal.number(1)) 947 position = expression.args.get("position") or (occurrence and exp.Literal.number(1)) 948 949 return self.func( 950 "REGEXP_SUBSTR", 951 expression.this, 952 expression.expression, 953 position, 954 occurrence, 955 parameters, 956 group, 957 ) 958 959 def except_op(self, expression: exp.Except) -> str: 960 if not expression.args.get("distinct"): 961 self.unsupported("EXCEPT with All is not supported in Snowflake") 962 return super().except_op(expression) 963 964 def intersect_op(self, expression: exp.Intersect) -> str: 965 if not expression.args.get("distinct"): 966 self.unsupported("INTERSECT with All is not supported in Snowflake") 967 return super().intersect_op(expression) 968 969 def describe_sql(self, expression: exp.Describe) -> str: 970 # Default to table if kind is unknown 971 kind_value = expression.args.get("kind") or "TABLE" 972 kind = f" {kind_value}" if kind_value else "" 973 this = f" {self.sql(expression, 'this')}" 974 expressions = self.expressions(expression, flat=True) 975 expressions = f" {expressions}" if expressions else "" 976 return f"DESCRIBE{kind}{this}{expressions}" 977 978 def generatedasidentitycolumnconstraint_sql( 979 self, expression: exp.GeneratedAsIdentityColumnConstraint 980 ) -> str: 981 start = expression.args.get("start") 982 start = f" START {start}" if start else "" 983 increment = expression.args.get("increment") 984 increment = f" INCREMENT {increment}" if increment else "" 985 return f"AUTOINCREMENT{start}{increment}" 986 987 def swaptable_sql(self, expression: exp.SwapTable) -> str: 988 this = self.sql(expression, "this") 989 return f"SWAP WITH {this}" 990 991 def cluster_sql(self, expression: exp.Cluster) -> str: 992 return f"CLUSTER BY ({self.expressions(expression, flat=True)})" 993 994 def struct_sql(self, expression: exp.Struct) -> str: 995 keys = [] 996 values = [] 997 998 for i, e in enumerate(expression.expressions): 999 if isinstance(e, exp.PropertyEQ): 1000 keys.append( 1001 exp.Literal.string(e.name) if isinstance(e.this, exp.Identifier) else e.this 1002 ) 1003 values.append(e.expression) 1004 else: 1005 keys.append(exp.Literal.string(f"_{i}")) 1006 values.append(e) 1007 1008 return self.func("OBJECT_CONSTRUCT", *flatten(zip(keys, values))) 1009 1010 def approxquantile_sql(self, expression: exp.ApproxQuantile) -> str: 1011 if expression.args.get("weight") or expression.args.get("accuracy"): 1012 self.unsupported( 1013 "APPROX_PERCENTILE with weight and/or accuracy arguments are not supported in Snowflake" 1014 ) 1015 1016 return self.func("APPROX_PERCENTILE", expression.this, expression.args.get("quantile")) 1017 1018 def alterset_sql(self, expression: exp.AlterSet) -> str: 1019 exprs = self.expressions(expression, flat=True) 1020 exprs = f" {exprs}" if exprs else "" 1021 file_format = self.expressions(expression, key="file_format", flat=True, sep=" ") 1022 file_format = f" STAGE_FILE_FORMAT = ({file_format})" if file_format else "" 1023 copy_options = self.expressions(expression, key="copy_options", flat=True, sep=" ") 1024 copy_options = f" STAGE_COPY_OPTIONS = ({copy_options})" if copy_options else "" 1025 tag = self.expressions(expression, key="tag", flat=True) 1026 tag = f" TAG {tag}" if tag else "" 1027 1028 return f"SET{exprs}{file_format}{copy_options}{tag}"
190class Snowflake(Dialect): 191 # https://docs.snowflake.com/en/sql-reference/identifiers-syntax 192 NORMALIZATION_STRATEGY = NormalizationStrategy.UPPERCASE 193 NULL_ORDERING = "nulls_are_large" 194 TIME_FORMAT = "'YYYY-MM-DD HH24:MI:SS'" 195 SUPPORTS_USER_DEFINED_TYPES = False 196 SUPPORTS_SEMI_ANTI_JOIN = False 197 PREFER_CTE_ALIAS_COLUMN = True 198 TABLESAMPLE_SIZE_IS_PERCENT = True 199 COPY_PARAMS_ARE_CSV = False 200 201 TIME_MAPPING = { 202 "YYYY": "%Y", 203 "yyyy": "%Y", 204 "YY": "%y", 205 "yy": "%y", 206 "MMMM": "%B", 207 "mmmm": "%B", 208 "MON": "%b", 209 "mon": "%b", 210 "MM": "%m", 211 "mm": "%m", 212 "DD": "%d", 213 "dd": "%-d", 214 "DY": "%a", 215 "dy": "%w", 216 "HH24": "%H", 217 "hh24": "%H", 218 "HH12": "%I", 219 "hh12": "%I", 220 "MI": "%M", 221 "mi": "%M", 222 "SS": "%S", 223 "ss": "%S", 224 "FF": "%f", 225 "ff": "%f", 226 "FF6": "%f", 227 "ff6": "%f", 228 } 229 230 def quote_identifier(self, expression: E, identify: bool = True) -> E: 231 # This disables quoting DUAL in SELECT ... FROM DUAL, because Snowflake treats an 232 # unquoted DUAL keyword in a special way and does not map it to a user-defined table 233 if ( 234 isinstance(expression, exp.Identifier) 235 and isinstance(expression.parent, exp.Table) 236 and expression.name.lower() == "dual" 237 ): 238 return expression # type: ignore 239 240 return super().quote_identifier(expression, identify=identify) 241 242 class Parser(parser.Parser): 243 IDENTIFY_PIVOT_STRINGS = True 244 DEFAULT_SAMPLING_METHOD = "BERNOULLI" 245 COLON_IS_JSON_EXTRACT = True 246 247 ID_VAR_TOKENS = { 248 *parser.Parser.ID_VAR_TOKENS, 249 TokenType.MATCH_CONDITION, 250 } 251 252 TABLE_ALIAS_TOKENS = parser.Parser.TABLE_ALIAS_TOKENS | {TokenType.WINDOW} 253 TABLE_ALIAS_TOKENS.discard(TokenType.MATCH_CONDITION) 254 255 FUNCTIONS = { 256 **parser.Parser.FUNCTIONS, 257 "APPROX_PERCENTILE": exp.ApproxQuantile.from_arg_list, 258 "ARRAYAGG": exp.ArrayAgg.from_arg_list, 259 "ARRAY_CONSTRUCT": exp.Array.from_arg_list, 260 "ARRAY_CONTAINS": lambda args: exp.ArrayContains( 261 this=seq_get(args, 1), expression=seq_get(args, 0) 262 ), 263 "ARRAY_GENERATE_RANGE": lambda args: exp.GenerateSeries( 264 # ARRAY_GENERATE_RANGE has an exlusive end; we normalize it to be inclusive 265 start=seq_get(args, 0), 266 end=exp.Sub(this=seq_get(args, 1), expression=exp.Literal.number(1)), 267 step=seq_get(args, 2), 268 ), 269 "BITXOR": binary_from_function(exp.BitwiseXor), 270 "BIT_XOR": binary_from_function(exp.BitwiseXor), 271 "BOOLXOR": binary_from_function(exp.Xor), 272 "CONVERT_TIMEZONE": _build_convert_timezone, 273 "DATE": _build_datetime("DATE", exp.DataType.Type.DATE), 274 "DATE_TRUNC": _date_trunc_to_time, 275 "DATEADD": _build_date_time_add(exp.DateAdd), 276 "DATEDIFF": _build_datediff, 277 "DIV0": _build_if_from_div0, 278 "FLATTEN": exp.Explode.from_arg_list, 279 "GET_PATH": lambda args, dialect: exp.JSONExtract( 280 this=seq_get(args, 0), expression=dialect.to_json_path(seq_get(args, 1)) 281 ), 282 "IFF": exp.If.from_arg_list, 283 "LAST_DAY": lambda args: exp.LastDay( 284 this=seq_get(args, 0), unit=map_date_part(seq_get(args, 1)) 285 ), 286 "LISTAGG": exp.GroupConcat.from_arg_list, 287 "MEDIAN": lambda args: exp.PercentileCont( 288 this=seq_get(args, 0), expression=exp.Literal.number(0.5) 289 ), 290 "NULLIFZERO": _build_if_from_nullifzero, 291 "OBJECT_CONSTRUCT": _build_object_construct, 292 "REGEXP_REPLACE": _build_regexp_replace, 293 "REGEXP_SUBSTR": exp.RegexpExtract.from_arg_list, 294 "RLIKE": exp.RegexpLike.from_arg_list, 295 "SQUARE": lambda args: exp.Pow(this=seq_get(args, 0), expression=exp.Literal.number(2)), 296 "TIMEADD": _build_date_time_add(exp.TimeAdd), 297 "TIMEDIFF": _build_datediff, 298 "TIMESTAMPADD": _build_date_time_add(exp.DateAdd), 299 "TIMESTAMPDIFF": _build_datediff, 300 "TIMESTAMPFROMPARTS": build_timestamp_from_parts, 301 "TIMESTAMP_FROM_PARTS": build_timestamp_from_parts, 302 "TRY_PARSE_JSON": lambda args: exp.ParseJSON(this=seq_get(args, 0), safe=True), 303 "TRY_TO_DATE": _build_datetime("TRY_TO_DATE", exp.DataType.Type.DATE, safe=True), 304 "TO_DATE": _build_datetime("TO_DATE", exp.DataType.Type.DATE), 305 "TO_NUMBER": lambda args: exp.ToNumber( 306 this=seq_get(args, 0), 307 format=seq_get(args, 1), 308 precision=seq_get(args, 2), 309 scale=seq_get(args, 3), 310 ), 311 "TO_TIME": _build_datetime("TO_TIME", exp.DataType.Type.TIME), 312 "TO_TIMESTAMP": _build_datetime("TO_TIMESTAMP", exp.DataType.Type.TIMESTAMP), 313 "TO_TIMESTAMP_LTZ": _build_datetime("TO_TIMESTAMP_LTZ", exp.DataType.Type.TIMESTAMPLTZ), 314 "TO_TIMESTAMP_NTZ": _build_datetime("TO_TIMESTAMP_NTZ", exp.DataType.Type.TIMESTAMP), 315 "TO_TIMESTAMP_TZ": _build_datetime("TO_TIMESTAMP_TZ", exp.DataType.Type.TIMESTAMPTZ), 316 "TO_VARCHAR": exp.ToChar.from_arg_list, 317 "ZEROIFNULL": _build_if_from_zeroifnull, 318 } 319 320 FUNCTION_PARSERS = { 321 **parser.Parser.FUNCTION_PARSERS, 322 "DATE_PART": lambda self: self._parse_date_part(), 323 "OBJECT_CONSTRUCT_KEEP_NULL": lambda self: self._parse_json_object(), 324 } 325 FUNCTION_PARSERS.pop("TRIM") 326 327 TIMESTAMPS = parser.Parser.TIMESTAMPS - {TokenType.TIME} 328 329 RANGE_PARSERS = { 330 **parser.Parser.RANGE_PARSERS, 331 TokenType.LIKE_ANY: parser.binary_range_parser(exp.LikeAny), 332 TokenType.ILIKE_ANY: parser.binary_range_parser(exp.ILikeAny), 333 } 334 335 ALTER_PARSERS = { 336 **parser.Parser.ALTER_PARSERS, 337 "UNSET": lambda self: self.expression( 338 exp.Set, 339 tag=self._match_text_seq("TAG"), 340 expressions=self._parse_csv(self._parse_id_var), 341 unset=True, 342 ), 343 "SWAP": lambda self: self._parse_alter_table_swap(), 344 } 345 346 STATEMENT_PARSERS = { 347 **parser.Parser.STATEMENT_PARSERS, 348 TokenType.SHOW: lambda self: self._parse_show(), 349 } 350 351 PROPERTY_PARSERS = { 352 **parser.Parser.PROPERTY_PARSERS, 353 "LOCATION": lambda self: self._parse_location_property(), 354 } 355 356 TYPE_CONVERTERS = { 357 # https://docs.snowflake.com/en/sql-reference/data-types-numeric#number 358 exp.DataType.Type.DECIMAL: build_default_decimal_type(precision=38, scale=0), 359 } 360 361 SHOW_PARSERS = { 362 "SCHEMAS": _show_parser("SCHEMAS"), 363 "TERSE SCHEMAS": _show_parser("SCHEMAS"), 364 "OBJECTS": _show_parser("OBJECTS"), 365 "TERSE OBJECTS": _show_parser("OBJECTS"), 366 "TABLES": _show_parser("TABLES"), 367 "TERSE TABLES": _show_parser("TABLES"), 368 "VIEWS": _show_parser("VIEWS"), 369 "TERSE VIEWS": _show_parser("VIEWS"), 370 "PRIMARY KEYS": _show_parser("PRIMARY KEYS"), 371 "TERSE PRIMARY KEYS": _show_parser("PRIMARY KEYS"), 372 "IMPORTED KEYS": _show_parser("IMPORTED KEYS"), 373 "TERSE IMPORTED KEYS": _show_parser("IMPORTED KEYS"), 374 "UNIQUE KEYS": _show_parser("UNIQUE KEYS"), 375 "TERSE UNIQUE KEYS": _show_parser("UNIQUE KEYS"), 376 "SEQUENCES": _show_parser("SEQUENCES"), 377 "TERSE SEQUENCES": _show_parser("SEQUENCES"), 378 "COLUMNS": _show_parser("COLUMNS"), 379 "USERS": _show_parser("USERS"), 380 "TERSE USERS": _show_parser("USERS"), 381 } 382 383 CONSTRAINT_PARSERS = { 384 **parser.Parser.CONSTRAINT_PARSERS, 385 "WITH": lambda self: self._parse_with_constraint(), 386 "MASKING": lambda self: self._parse_with_constraint(), 387 "PROJECTION": lambda self: self._parse_with_constraint(), 388 "TAG": lambda self: self._parse_with_constraint(), 389 } 390 391 STAGED_FILE_SINGLE_TOKENS = { 392 TokenType.DOT, 393 TokenType.MOD, 394 TokenType.SLASH, 395 } 396 397 FLATTEN_COLUMNS = ["SEQ", "KEY", "PATH", "INDEX", "VALUE", "THIS"] 398 399 SCHEMA_KINDS = {"OBJECTS", "TABLES", "VIEWS", "SEQUENCES", "UNIQUE KEYS", "IMPORTED KEYS"} 400 401 NON_TABLE_CREATABLES = {"STORAGE INTEGRATION", "TAG", "WAREHOUSE", "STREAMLIT"} 402 403 LAMBDAS = { 404 **parser.Parser.LAMBDAS, 405 TokenType.ARROW: lambda self, expressions: self.expression( 406 exp.Lambda, 407 this=self._replace_lambda( 408 self._parse_assignment(), 409 expressions, 410 ), 411 expressions=[e.this if isinstance(e, exp.Cast) else e for e in expressions], 412 ), 413 } 414 415 def _parse_with_constraint(self) -> t.Optional[exp.Expression]: 416 if self._prev.token_type != TokenType.WITH: 417 self._retreat(self._index - 1) 418 419 if self._match_text_seq("MASKING", "POLICY"): 420 policy = self._parse_column() 421 return self.expression( 422 exp.MaskingPolicyColumnConstraint, 423 this=policy.to_dot() if isinstance(policy, exp.Column) else policy, 424 expressions=self._match(TokenType.USING) 425 and self._parse_wrapped_csv(self._parse_id_var), 426 ) 427 if self._match_text_seq("PROJECTION", "POLICY"): 428 policy = self._parse_column() 429 return self.expression( 430 exp.ProjectionPolicyColumnConstraint, 431 this=policy.to_dot() if isinstance(policy, exp.Column) else policy, 432 ) 433 if self._match(TokenType.TAG): 434 return self.expression( 435 exp.TagColumnConstraint, 436 expressions=self._parse_wrapped_csv(self._parse_property), 437 ) 438 439 return None 440 441 def _parse_create(self) -> exp.Create | exp.Command: 442 expression = super()._parse_create() 443 if isinstance(expression, exp.Create) and expression.kind in self.NON_TABLE_CREATABLES: 444 # Replace the Table node with the enclosed Identifier 445 expression.this.replace(expression.this.this) 446 447 return expression 448 449 # https://docs.snowflake.com/en/sql-reference/functions/date_part.html 450 # https://docs.snowflake.com/en/sql-reference/functions-date-time.html#label-supported-date-time-parts 451 def _parse_date_part(self: Snowflake.Parser) -> t.Optional[exp.Expression]: 452 this = self._parse_var() or self._parse_type() 453 454 if not this: 455 return None 456 457 self._match(TokenType.COMMA) 458 expression = self._parse_bitwise() 459 this = map_date_part(this) 460 name = this.name.upper() 461 462 if name.startswith("EPOCH"): 463 if name == "EPOCH_MILLISECOND": 464 scale = 10**3 465 elif name == "EPOCH_MICROSECOND": 466 scale = 10**6 467 elif name == "EPOCH_NANOSECOND": 468 scale = 10**9 469 else: 470 scale = None 471 472 ts = self.expression(exp.Cast, this=expression, to=exp.DataType.build("TIMESTAMP")) 473 to_unix: exp.Expression = self.expression(exp.TimeToUnix, this=ts) 474 475 if scale: 476 to_unix = exp.Mul(this=to_unix, expression=exp.Literal.number(scale)) 477 478 return to_unix 479 480 return self.expression(exp.Extract, this=this, expression=expression) 481 482 def _parse_bracket_key_value(self, is_map: bool = False) -> t.Optional[exp.Expression]: 483 if is_map: 484 # Keys are strings in Snowflake's objects, see also: 485 # - https://docs.snowflake.com/en/sql-reference/data-types-semistructured 486 # - https://docs.snowflake.com/en/sql-reference/functions/object_construct 487 return self._parse_slice(self._parse_string()) 488 489 return self._parse_slice(self._parse_alias(self._parse_assignment(), explicit=True)) 490 491 def _parse_lateral(self) -> t.Optional[exp.Lateral]: 492 lateral = super()._parse_lateral() 493 if not lateral: 494 return lateral 495 496 if isinstance(lateral.this, exp.Explode): 497 table_alias = lateral.args.get("alias") 498 columns = [exp.to_identifier(col) for col in self.FLATTEN_COLUMNS] 499 if table_alias and not table_alias.args.get("columns"): 500 table_alias.set("columns", columns) 501 elif not table_alias: 502 exp.alias_(lateral, "_flattened", table=columns, copy=False) 503 504 return lateral 505 506 def _parse_at_before(self, table: exp.Table) -> exp.Table: 507 # https://docs.snowflake.com/en/sql-reference/constructs/at-before 508 index = self._index 509 if self._match_texts(("AT", "BEFORE")): 510 this = self._prev.text.upper() 511 kind = ( 512 self._match(TokenType.L_PAREN) 513 and self._match_texts(self.HISTORICAL_DATA_KIND) 514 and self._prev.text.upper() 515 ) 516 expression = self._match(TokenType.FARROW) and self._parse_bitwise() 517 518 if expression: 519 self._match_r_paren() 520 when = self.expression( 521 exp.HistoricalData, this=this, kind=kind, expression=expression 522 ) 523 table.set("when", when) 524 else: 525 self._retreat(index) 526 527 return table 528 529 def _parse_table_parts( 530 self, schema: bool = False, is_db_reference: bool = False, wildcard: bool = False 531 ) -> exp.Table: 532 # https://docs.snowflake.com/en/user-guide/querying-stage 533 if self._match(TokenType.STRING, advance=False): 534 table = self._parse_string() 535 elif self._match_text_seq("@", advance=False): 536 table = self._parse_location_path() 537 else: 538 table = None 539 540 if table: 541 file_format = None 542 pattern = None 543 544 wrapped = self._match(TokenType.L_PAREN) 545 while self._curr and wrapped and not self._match(TokenType.R_PAREN): 546 if self._match_text_seq("FILE_FORMAT", "=>"): 547 file_format = self._parse_string() or super()._parse_table_parts( 548 is_db_reference=is_db_reference 549 ) 550 elif self._match_text_seq("PATTERN", "=>"): 551 pattern = self._parse_string() 552 else: 553 break 554 555 self._match(TokenType.COMMA) 556 557 table = self.expression(exp.Table, this=table, format=file_format, pattern=pattern) 558 else: 559 table = super()._parse_table_parts(schema=schema, is_db_reference=is_db_reference) 560 561 return self._parse_at_before(table) 562 563 def _parse_id_var( 564 self, 565 any_token: bool = True, 566 tokens: t.Optional[t.Collection[TokenType]] = None, 567 ) -> t.Optional[exp.Expression]: 568 if self._match_text_seq("IDENTIFIER", "("): 569 identifier = ( 570 super()._parse_id_var(any_token=any_token, tokens=tokens) 571 or self._parse_string() 572 ) 573 self._match_r_paren() 574 return self.expression(exp.Anonymous, this="IDENTIFIER", expressions=[identifier]) 575 576 return super()._parse_id_var(any_token=any_token, tokens=tokens) 577 578 def _parse_show_snowflake(self, this: str) -> exp.Show: 579 scope = None 580 scope_kind = None 581 582 # will identity SHOW TERSE SCHEMAS but not SHOW TERSE PRIMARY KEYS 583 # which is syntactically valid but has no effect on the output 584 terse = self._tokens[self._index - 2].text.upper() == "TERSE" 585 586 history = self._match_text_seq("HISTORY") 587 588 like = self._parse_string() if self._match(TokenType.LIKE) else None 589 590 if self._match(TokenType.IN): 591 if self._match_text_seq("ACCOUNT"): 592 scope_kind = "ACCOUNT" 593 elif self._match_set(self.DB_CREATABLES): 594 scope_kind = self._prev.text.upper() 595 if self._curr: 596 scope = self._parse_table_parts() 597 elif self._curr: 598 scope_kind = "SCHEMA" if this in self.SCHEMA_KINDS else "TABLE" 599 scope = self._parse_table_parts() 600 601 return self.expression( 602 exp.Show, 603 **{ 604 "terse": terse, 605 "this": this, 606 "history": history, 607 "like": like, 608 "scope": scope, 609 "scope_kind": scope_kind, 610 "starts_with": self._match_text_seq("STARTS", "WITH") and self._parse_string(), 611 "limit": self._parse_limit(), 612 "from": self._parse_string() if self._match(TokenType.FROM) else None, 613 }, 614 ) 615 616 def _parse_alter_table_swap(self) -> exp.SwapTable: 617 self._match_text_seq("WITH") 618 return self.expression(exp.SwapTable, this=self._parse_table(schema=True)) 619 620 def _parse_location_property(self) -> exp.LocationProperty: 621 self._match(TokenType.EQ) 622 return self.expression(exp.LocationProperty, this=self._parse_location_path()) 623 624 def _parse_file_location(self) -> t.Optional[exp.Expression]: 625 # Parse either a subquery or a staged file 626 return ( 627 self._parse_select(table=True, parse_subquery_alias=False) 628 if self._match(TokenType.L_PAREN, advance=False) 629 else self._parse_table_parts() 630 ) 631 632 def _parse_location_path(self) -> exp.Var: 633 parts = [self._advance_any(ignore_reserved=True)] 634 635 # We avoid consuming a comma token because external tables like @foo and @bar 636 # can be joined in a query with a comma separator, as well as closing paren 637 # in case of subqueries 638 while self._is_connected() and not self._match_set( 639 (TokenType.COMMA, TokenType.R_PAREN), advance=False 640 ): 641 parts.append(self._advance_any(ignore_reserved=True)) 642 643 return exp.var("".join(part.text for part in parts if part)) 644 645 def _parse_lambda_arg(self) -> t.Optional[exp.Expression]: 646 this = super()._parse_lambda_arg() 647 648 if not this: 649 return this 650 651 typ = self._parse_types() 652 653 if typ: 654 return self.expression(exp.Cast, this=this, to=typ) 655 656 return this 657 658 class Tokenizer(tokens.Tokenizer): 659 STRING_ESCAPES = ["\\", "'"] 660 HEX_STRINGS = [("x'", "'"), ("X'", "'")] 661 RAW_STRINGS = ["$$"] 662 COMMENTS = ["--", "//", ("/*", "*/")] 663 664 KEYWORDS = { 665 **tokens.Tokenizer.KEYWORDS, 666 "BYTEINT": TokenType.INT, 667 "CHAR VARYING": TokenType.VARCHAR, 668 "CHARACTER VARYING": TokenType.VARCHAR, 669 "EXCLUDE": TokenType.EXCEPT, 670 "ILIKE ANY": TokenType.ILIKE_ANY, 671 "LIKE ANY": TokenType.LIKE_ANY, 672 "MATCH_CONDITION": TokenType.MATCH_CONDITION, 673 "MATCH_RECOGNIZE": TokenType.MATCH_RECOGNIZE, 674 "MINUS": TokenType.EXCEPT, 675 "NCHAR VARYING": TokenType.VARCHAR, 676 "PUT": TokenType.COMMAND, 677 "REMOVE": TokenType.COMMAND, 678 "RM": TokenType.COMMAND, 679 "SAMPLE": TokenType.TABLE_SAMPLE, 680 "SQL_DOUBLE": TokenType.DOUBLE, 681 "SQL_VARCHAR": TokenType.VARCHAR, 682 "STORAGE INTEGRATION": TokenType.STORAGE_INTEGRATION, 683 "TAG": TokenType.TAG, 684 "TIMESTAMP_TZ": TokenType.TIMESTAMPTZ, 685 "TOP": TokenType.TOP, 686 "WAREHOUSE": TokenType.WAREHOUSE, 687 "STREAMLIT": TokenType.STREAMLIT, 688 } 689 KEYWORDS.pop("/*+") 690 691 SINGLE_TOKENS = { 692 **tokens.Tokenizer.SINGLE_TOKENS, 693 "$": TokenType.PARAMETER, 694 } 695 696 VAR_SINGLE_TOKENS = {"$"} 697 698 COMMANDS = tokens.Tokenizer.COMMANDS - {TokenType.SHOW} 699 700 class Generator(generator.Generator): 701 PARAMETER_TOKEN = "$" 702 MATCHED_BY_SOURCE = False 703 SINGLE_STRING_INTERVAL = True 704 JOIN_HINTS = False 705 TABLE_HINTS = False 706 QUERY_HINTS = False 707 AGGREGATE_FILTER_SUPPORTED = False 708 SUPPORTS_TABLE_COPY = False 709 COLLATE_IS_FUNC = True 710 LIMIT_ONLY_LITERALS = True 711 JSON_KEY_VALUE_PAIR_SEP = "," 712 INSERT_OVERWRITE = " OVERWRITE INTO" 713 STRUCT_DELIMITER = ("(", ")") 714 COPY_PARAMS_ARE_WRAPPED = False 715 COPY_PARAMS_EQ_REQUIRED = True 716 STAR_EXCEPT = "EXCLUDE" 717 718 TRANSFORMS = { 719 **generator.Generator.TRANSFORMS, 720 exp.ApproxDistinct: rename_func("APPROX_COUNT_DISTINCT"), 721 exp.ArgMax: rename_func("MAX_BY"), 722 exp.ArgMin: rename_func("MIN_BY"), 723 exp.Array: inline_array_sql, 724 exp.ArrayConcat: rename_func("ARRAY_CAT"), 725 exp.ArrayContains: lambda self, e: self.func("ARRAY_CONTAINS", e.expression, e.this), 726 exp.AtTimeZone: lambda self, e: self.func( 727 "CONVERT_TIMEZONE", e.args.get("zone"), e.this 728 ), 729 exp.BitwiseXor: rename_func("BITXOR"), 730 exp.Create: transforms.preprocess([_flatten_structured_types_unless_iceberg]), 731 exp.DateAdd: date_delta_sql("DATEADD"), 732 exp.DateDiff: date_delta_sql("DATEDIFF"), 733 exp.DateStrToDate: datestrtodate_sql, 734 exp.DayOfMonth: rename_func("DAYOFMONTH"), 735 exp.DayOfWeek: rename_func("DAYOFWEEK"), 736 exp.DayOfYear: rename_func("DAYOFYEAR"), 737 exp.Explode: rename_func("FLATTEN"), 738 exp.Extract: rename_func("DATE_PART"), 739 exp.FromTimeZone: lambda self, e: self.func( 740 "CONVERT_TIMEZONE", e.args.get("zone"), "'UTC'", e.this 741 ), 742 exp.GenerateSeries: lambda self, e: self.func( 743 "ARRAY_GENERATE_RANGE", e.args["start"], e.args["end"] + 1, e.args.get("step") 744 ), 745 exp.GroupConcat: rename_func("LISTAGG"), 746 exp.If: if_sql(name="IFF", false_value="NULL"), 747 exp.JSONExtract: lambda self, e: self.func("GET_PATH", e.this, e.expression), 748 exp.JSONExtractScalar: lambda self, e: self.func( 749 "JSON_EXTRACT_PATH_TEXT", e.this, e.expression 750 ), 751 exp.JSONObject: lambda self, e: self.func("OBJECT_CONSTRUCT_KEEP_NULL", *e.expressions), 752 exp.JSONPathRoot: lambda *_: "", 753 exp.LogicalAnd: rename_func("BOOLAND_AGG"), 754 exp.LogicalOr: rename_func("BOOLOR_AGG"), 755 exp.Map: lambda self, e: var_map_sql(self, e, "OBJECT_CONSTRUCT"), 756 exp.Max: max_or_greatest, 757 exp.Min: min_or_least, 758 exp.ParseJSON: lambda self, e: self.func( 759 "TRY_PARSE_JSON" if e.args.get("safe") else "PARSE_JSON", e.this 760 ), 761 exp.PartitionedByProperty: lambda self, e: f"PARTITION BY {self.sql(e, 'this')}", 762 exp.PercentileCont: transforms.preprocess( 763 [transforms.add_within_group_for_percentiles] 764 ), 765 exp.PercentileDisc: transforms.preprocess( 766 [transforms.add_within_group_for_percentiles] 767 ), 768 exp.Pivot: transforms.preprocess([_unqualify_unpivot_columns]), 769 exp.RegexpILike: _regexpilike_sql, 770 exp.Rand: rename_func("RANDOM"), 771 exp.Select: transforms.preprocess( 772 [ 773 transforms.eliminate_distinct_on, 774 transforms.explode_to_unnest(), 775 transforms.eliminate_semi_and_anti_joins, 776 ] 777 ), 778 exp.SHA: rename_func("SHA1"), 779 exp.StarMap: rename_func("OBJECT_CONSTRUCT"), 780 exp.StartsWith: rename_func("STARTSWITH"), 781 exp.StrPosition: lambda self, e: self.func( 782 "POSITION", e.args.get("substr"), e.this, e.args.get("position") 783 ), 784 exp.StrToTime: lambda self, e: self.func("TO_TIMESTAMP", e.this, self.format_time(e)), 785 exp.Stuff: rename_func("INSERT"), 786 exp.TimeAdd: date_delta_sql("TIMEADD"), 787 exp.TimestampDiff: lambda self, e: self.func( 788 "TIMESTAMPDIFF", e.unit, e.expression, e.this 789 ), 790 exp.TimestampTrunc: timestamptrunc_sql(), 791 exp.TimeStrToTime: timestrtotime_sql, 792 exp.TimeToStr: lambda self, e: self.func( 793 "TO_CHAR", exp.cast(e.this, exp.DataType.Type.TIMESTAMP), self.format_time(e) 794 ), 795 exp.TimeToUnix: lambda self, e: f"EXTRACT(epoch_second FROM {self.sql(e, 'this')})", 796 exp.ToArray: rename_func("TO_ARRAY"), 797 exp.ToChar: lambda self, e: self.function_fallback_sql(e), 798 exp.Trim: lambda self, e: self.func("TRIM", e.this, e.expression), 799 exp.TsOrDsAdd: date_delta_sql("DATEADD", cast=True), 800 exp.TsOrDsDiff: date_delta_sql("DATEDIFF"), 801 exp.TsOrDsToDate: lambda self, e: self.func( 802 "TRY_TO_DATE" if e.args.get("safe") else "TO_DATE", e.this, self.format_time(e) 803 ), 804 exp.UnixToTime: rename_func("TO_TIMESTAMP"), 805 exp.VarMap: lambda self, e: var_map_sql(self, e, "OBJECT_CONSTRUCT"), 806 exp.WeekOfYear: rename_func("WEEKOFYEAR"), 807 exp.Xor: rename_func("BOOLXOR"), 808 } 809 810 SUPPORTED_JSON_PATH_PARTS = { 811 exp.JSONPathKey, 812 exp.JSONPathRoot, 813 exp.JSONPathSubscript, 814 } 815 816 TYPE_MAPPING = { 817 **generator.Generator.TYPE_MAPPING, 818 exp.DataType.Type.NESTED: "OBJECT", 819 exp.DataType.Type.STRUCT: "OBJECT", 820 } 821 822 PROPERTIES_LOCATION = { 823 **generator.Generator.PROPERTIES_LOCATION, 824 exp.SetProperty: exp.Properties.Location.UNSUPPORTED, 825 exp.VolatileProperty: exp.Properties.Location.UNSUPPORTED, 826 } 827 828 UNSUPPORTED_VALUES_EXPRESSIONS = { 829 exp.Map, 830 exp.StarMap, 831 exp.Struct, 832 exp.VarMap, 833 } 834 835 def with_properties(self, properties: exp.Properties) -> str: 836 return self.properties(properties, wrapped=False, prefix=self.sep(""), sep=" ") 837 838 def values_sql(self, expression: exp.Values, values_as_table: bool = True) -> str: 839 if expression.find(*self.UNSUPPORTED_VALUES_EXPRESSIONS): 840 values_as_table = False 841 842 return super().values_sql(expression, values_as_table=values_as_table) 843 844 def datatype_sql(self, expression: exp.DataType) -> str: 845 expressions = expression.expressions 846 if ( 847 expressions 848 and expression.is_type(*exp.DataType.STRUCT_TYPES) 849 and any(isinstance(field_type, exp.DataType) for field_type in expressions) 850 ): 851 # The correct syntax is OBJECT [ (<key> <value_type [NOT NULL] [, ...]) ] 852 return "OBJECT" 853 854 return super().datatype_sql(expression) 855 856 def tonumber_sql(self, expression: exp.ToNumber) -> str: 857 return self.func( 858 "TO_NUMBER", 859 expression.this, 860 expression.args.get("format"), 861 expression.args.get("precision"), 862 expression.args.get("scale"), 863 ) 864 865 def timestampfromparts_sql(self, expression: exp.TimestampFromParts) -> str: 866 milli = expression.args.get("milli") 867 if milli is not None: 868 milli_to_nano = milli.pop() * exp.Literal.number(1000000) 869 expression.set("nano", milli_to_nano) 870 871 return rename_func("TIMESTAMP_FROM_PARTS")(self, expression) 872 873 def trycast_sql(self, expression: exp.TryCast) -> str: 874 value = expression.this 875 876 if value.type is None: 877 from sqlglot.optimizer.annotate_types import annotate_types 878 879 value = annotate_types(value) 880 881 if value.is_type(*exp.DataType.TEXT_TYPES, exp.DataType.Type.UNKNOWN): 882 return super().trycast_sql(expression) 883 884 # TRY_CAST only works for string values in Snowflake 885 return self.cast_sql(expression) 886 887 def log_sql(self, expression: exp.Log) -> str: 888 if not expression.expression: 889 return self.func("LN", expression.this) 890 891 return super().log_sql(expression) 892 893 def unnest_sql(self, expression: exp.Unnest) -> str: 894 unnest_alias = expression.args.get("alias") 895 offset = expression.args.get("offset") 896 897 columns = [ 898 exp.to_identifier("seq"), 899 exp.to_identifier("key"), 900 exp.to_identifier("path"), 901 offset.pop() if isinstance(offset, exp.Expression) else exp.to_identifier("index"), 902 seq_get(unnest_alias.columns if unnest_alias else [], 0) 903 or exp.to_identifier("value"), 904 exp.to_identifier("this"), 905 ] 906 907 if unnest_alias: 908 unnest_alias.set("columns", columns) 909 else: 910 unnest_alias = exp.TableAlias(this="_u", columns=columns) 911 912 explode = f"TABLE(FLATTEN(INPUT => {self.sql(expression.expressions[0])}))" 913 alias = self.sql(unnest_alias) 914 alias = f" AS {alias}" if alias else "" 915 return f"{explode}{alias}" 916 917 def show_sql(self, expression: exp.Show) -> str: 918 terse = "TERSE " if expression.args.get("terse") else "" 919 history = " HISTORY" if expression.args.get("history") else "" 920 like = self.sql(expression, "like") 921 like = f" LIKE {like}" if like else "" 922 923 scope = self.sql(expression, "scope") 924 scope = f" {scope}" if scope else "" 925 926 scope_kind = self.sql(expression, "scope_kind") 927 if scope_kind: 928 scope_kind = f" IN {scope_kind}" 929 930 starts_with = self.sql(expression, "starts_with") 931 if starts_with: 932 starts_with = f" STARTS WITH {starts_with}" 933 934 limit = self.sql(expression, "limit") 935 936 from_ = self.sql(expression, "from") 937 if from_: 938 from_ = f" FROM {from_}" 939 940 return f"SHOW {terse}{expression.name}{history}{like}{scope_kind}{scope}{starts_with}{limit}{from_}" 941 942 def regexpextract_sql(self, expression: exp.RegexpExtract) -> str: 943 # Other dialects don't support all of the following parameters, so we need to 944 # generate default values as necessary to ensure the transpilation is correct 945 group = expression.args.get("group") 946 parameters = expression.args.get("parameters") or (group and exp.Literal.string("c")) 947 occurrence = expression.args.get("occurrence") or (parameters and exp.Literal.number(1)) 948 position = expression.args.get("position") or (occurrence and exp.Literal.number(1)) 949 950 return self.func( 951 "REGEXP_SUBSTR", 952 expression.this, 953 expression.expression, 954 position, 955 occurrence, 956 parameters, 957 group, 958 ) 959 960 def except_op(self, expression: exp.Except) -> str: 961 if not expression.args.get("distinct"): 962 self.unsupported("EXCEPT with All is not supported in Snowflake") 963 return super().except_op(expression) 964 965 def intersect_op(self, expression: exp.Intersect) -> str: 966 if not expression.args.get("distinct"): 967 self.unsupported("INTERSECT with All is not supported in Snowflake") 968 return super().intersect_op(expression) 969 970 def describe_sql(self, expression: exp.Describe) -> str: 971 # Default to table if kind is unknown 972 kind_value = expression.args.get("kind") or "TABLE" 973 kind = f" {kind_value}" if kind_value else "" 974 this = f" {self.sql(expression, 'this')}" 975 expressions = self.expressions(expression, flat=True) 976 expressions = f" {expressions}" if expressions else "" 977 return f"DESCRIBE{kind}{this}{expressions}" 978 979 def generatedasidentitycolumnconstraint_sql( 980 self, expression: exp.GeneratedAsIdentityColumnConstraint 981 ) -> str: 982 start = expression.args.get("start") 983 start = f" START {start}" if start else "" 984 increment = expression.args.get("increment") 985 increment = f" INCREMENT {increment}" if increment else "" 986 return f"AUTOINCREMENT{start}{increment}" 987 988 def swaptable_sql(self, expression: exp.SwapTable) -> str: 989 this = self.sql(expression, "this") 990 return f"SWAP WITH {this}" 991 992 def cluster_sql(self, expression: exp.Cluster) -> str: 993 return f"CLUSTER BY ({self.expressions(expression, flat=True)})" 994 995 def struct_sql(self, expression: exp.Struct) -> str: 996 keys = [] 997 values = [] 998 999 for i, e in enumerate(expression.expressions): 1000 if isinstance(e, exp.PropertyEQ): 1001 keys.append( 1002 exp.Literal.string(e.name) if isinstance(e.this, exp.Identifier) else e.this 1003 ) 1004 values.append(e.expression) 1005 else: 1006 keys.append(exp.Literal.string(f"_{i}")) 1007 values.append(e) 1008 1009 return self.func("OBJECT_CONSTRUCT", *flatten(zip(keys, values))) 1010 1011 def approxquantile_sql(self, expression: exp.ApproxQuantile) -> str: 1012 if expression.args.get("weight") or expression.args.get("accuracy"): 1013 self.unsupported( 1014 "APPROX_PERCENTILE with weight and/or accuracy arguments are not supported in Snowflake" 1015 ) 1016 1017 return self.func("APPROX_PERCENTILE", expression.this, expression.args.get("quantile")) 1018 1019 def alterset_sql(self, expression: exp.AlterSet) -> str: 1020 exprs = self.expressions(expression, flat=True) 1021 exprs = f" {exprs}" if exprs else "" 1022 file_format = self.expressions(expression, key="file_format", flat=True, sep=" ") 1023 file_format = f" STAGE_FILE_FORMAT = ({file_format})" if file_format else "" 1024 copy_options = self.expressions(expression, key="copy_options", flat=True, sep=" ") 1025 copy_options = f" STAGE_COPY_OPTIONS = ({copy_options})" if copy_options else "" 1026 tag = self.expressions(expression, key="tag", flat=True) 1027 tag = f" TAG {tag}" if tag else "" 1028 1029 return f"SET{exprs}{file_format}{copy_options}{tag}"
Specifies the strategy according to which identifiers should be normalized.
Default NULL
ordering method to use if not explicitly set.
Possible values: "nulls_are_small"
, "nulls_are_large"
, "nulls_are_last"
Some dialects, such as Snowflake, allow you to reference a CTE column alias in the HAVING clause of the CTE. This flag will cause the CTE alias columns to override any projection aliases in the subquery.
For example, WITH y(c) AS ( SELECT SUM(a) FROM (SELECT 1 a) AS x HAVING c > 0 ) SELECT c FROM y;
will be rewritten as
WITH y(c) AS (
SELECT SUM(a) AS c FROM (SELECT 1 AS a) AS x HAVING c > 0
) SELECT c FROM y;
Associates this dialect's time formats with their equivalent Python strftime
formats.
230 def quote_identifier(self, expression: E, identify: bool = True) -> E: 231 # This disables quoting DUAL in SELECT ... FROM DUAL, because Snowflake treats an 232 # unquoted DUAL keyword in a special way and does not map it to a user-defined table 233 if ( 234 isinstance(expression, exp.Identifier) 235 and isinstance(expression.parent, exp.Table) 236 and expression.name.lower() == "dual" 237 ): 238 return expression # type: ignore 239 240 return super().quote_identifier(expression, identify=identify)
Adds quotes to a given identifier.
Arguments:
- expression: The expression of interest. If it's not an
Identifier
, this method is a no-op. - identify: If set to
False
, the quotes will only be added if the identifier is deemed "unsafe", with respect to its characters and this dialect's normalization strategy.
Mapping of an escaped sequence (\n
) to its unescaped version (
).
Inherited Members
- sqlglot.dialects.dialect.Dialect
- Dialect
- INDEX_OFFSET
- WEEK_OFFSET
- UNNEST_COLUMN_ONLY
- ALIAS_POST_TABLESAMPLE
- IDENTIFIERS_CAN_START_WITH_DIGIT
- DPIPE_IS_STRING_CONCAT
- STRICT_STRING_CONCAT
- NORMALIZE_FUNCTIONS
- LOG_BASE_FIRST
- TYPED_DIVISION
- SAFE_DIVISION
- CONCAT_COALESCE
- HEX_LOWERCASE
- DATE_FORMAT
- DATEINT_FORMAT
- FORMAT_MAPPING
- PSEUDOCOLUMNS
- FORCE_EARLY_ALIAS_REF_EXPANSION
- EXPAND_ALIAS_REFS_EARLY_ONLY_IN_GROUP_BY
- DATE_PART_MAPPING
- get_or_raise
- format_time
- settings
- normalize_identifier
- case_sensitive
- can_identify
- to_json_path
- parse
- parse_into
- generate
- transpile
- tokenize
- tokenizer
- jsonpath_tokenizer
- parser
- generator
242 class Parser(parser.Parser): 243 IDENTIFY_PIVOT_STRINGS = True 244 DEFAULT_SAMPLING_METHOD = "BERNOULLI" 245 COLON_IS_JSON_EXTRACT = True 246 247 ID_VAR_TOKENS = { 248 *parser.Parser.ID_VAR_TOKENS, 249 TokenType.MATCH_CONDITION, 250 } 251 252 TABLE_ALIAS_TOKENS = parser.Parser.TABLE_ALIAS_TOKENS | {TokenType.WINDOW} 253 TABLE_ALIAS_TOKENS.discard(TokenType.MATCH_CONDITION) 254 255 FUNCTIONS = { 256 **parser.Parser.FUNCTIONS, 257 "APPROX_PERCENTILE": exp.ApproxQuantile.from_arg_list, 258 "ARRAYAGG": exp.ArrayAgg.from_arg_list, 259 "ARRAY_CONSTRUCT": exp.Array.from_arg_list, 260 "ARRAY_CONTAINS": lambda args: exp.ArrayContains( 261 this=seq_get(args, 1), expression=seq_get(args, 0) 262 ), 263 "ARRAY_GENERATE_RANGE": lambda args: exp.GenerateSeries( 264 # ARRAY_GENERATE_RANGE has an exlusive end; we normalize it to be inclusive 265 start=seq_get(args, 0), 266 end=exp.Sub(this=seq_get(args, 1), expression=exp.Literal.number(1)), 267 step=seq_get(args, 2), 268 ), 269 "BITXOR": binary_from_function(exp.BitwiseXor), 270 "BIT_XOR": binary_from_function(exp.BitwiseXor), 271 "BOOLXOR": binary_from_function(exp.Xor), 272 "CONVERT_TIMEZONE": _build_convert_timezone, 273 "DATE": _build_datetime("DATE", exp.DataType.Type.DATE), 274 "DATE_TRUNC": _date_trunc_to_time, 275 "DATEADD": _build_date_time_add(exp.DateAdd), 276 "DATEDIFF": _build_datediff, 277 "DIV0": _build_if_from_div0, 278 "FLATTEN": exp.Explode.from_arg_list, 279 "GET_PATH": lambda args, dialect: exp.JSONExtract( 280 this=seq_get(args, 0), expression=dialect.to_json_path(seq_get(args, 1)) 281 ), 282 "IFF": exp.If.from_arg_list, 283 "LAST_DAY": lambda args: exp.LastDay( 284 this=seq_get(args, 0), unit=map_date_part(seq_get(args, 1)) 285 ), 286 "LISTAGG": exp.GroupConcat.from_arg_list, 287 "MEDIAN": lambda args: exp.PercentileCont( 288 this=seq_get(args, 0), expression=exp.Literal.number(0.5) 289 ), 290 "NULLIFZERO": _build_if_from_nullifzero, 291 "OBJECT_CONSTRUCT": _build_object_construct, 292 "REGEXP_REPLACE": _build_regexp_replace, 293 "REGEXP_SUBSTR": exp.RegexpExtract.from_arg_list, 294 "RLIKE": exp.RegexpLike.from_arg_list, 295 "SQUARE": lambda args: exp.Pow(this=seq_get(args, 0), expression=exp.Literal.number(2)), 296 "TIMEADD": _build_date_time_add(exp.TimeAdd), 297 "TIMEDIFF": _build_datediff, 298 "TIMESTAMPADD": _build_date_time_add(exp.DateAdd), 299 "TIMESTAMPDIFF": _build_datediff, 300 "TIMESTAMPFROMPARTS": build_timestamp_from_parts, 301 "TIMESTAMP_FROM_PARTS": build_timestamp_from_parts, 302 "TRY_PARSE_JSON": lambda args: exp.ParseJSON(this=seq_get(args, 0), safe=True), 303 "TRY_TO_DATE": _build_datetime("TRY_TO_DATE", exp.DataType.Type.DATE, safe=True), 304 "TO_DATE": _build_datetime("TO_DATE", exp.DataType.Type.DATE), 305 "TO_NUMBER": lambda args: exp.ToNumber( 306 this=seq_get(args, 0), 307 format=seq_get(args, 1), 308 precision=seq_get(args, 2), 309 scale=seq_get(args, 3), 310 ), 311 "TO_TIME": _build_datetime("TO_TIME", exp.DataType.Type.TIME), 312 "TO_TIMESTAMP": _build_datetime("TO_TIMESTAMP", exp.DataType.Type.TIMESTAMP), 313 "TO_TIMESTAMP_LTZ": _build_datetime("TO_TIMESTAMP_LTZ", exp.DataType.Type.TIMESTAMPLTZ), 314 "TO_TIMESTAMP_NTZ": _build_datetime("TO_TIMESTAMP_NTZ", exp.DataType.Type.TIMESTAMP), 315 "TO_TIMESTAMP_TZ": _build_datetime("TO_TIMESTAMP_TZ", exp.DataType.Type.TIMESTAMPTZ), 316 "TO_VARCHAR": exp.ToChar.from_arg_list, 317 "ZEROIFNULL": _build_if_from_zeroifnull, 318 } 319 320 FUNCTION_PARSERS = { 321 **parser.Parser.FUNCTION_PARSERS, 322 "DATE_PART": lambda self: self._parse_date_part(), 323 "OBJECT_CONSTRUCT_KEEP_NULL": lambda self: self._parse_json_object(), 324 } 325 FUNCTION_PARSERS.pop("TRIM") 326 327 TIMESTAMPS = parser.Parser.TIMESTAMPS - {TokenType.TIME} 328 329 RANGE_PARSERS = { 330 **parser.Parser.RANGE_PARSERS, 331 TokenType.LIKE_ANY: parser.binary_range_parser(exp.LikeAny), 332 TokenType.ILIKE_ANY: parser.binary_range_parser(exp.ILikeAny), 333 } 334 335 ALTER_PARSERS = { 336 **parser.Parser.ALTER_PARSERS, 337 "UNSET": lambda self: self.expression( 338 exp.Set, 339 tag=self._match_text_seq("TAG"), 340 expressions=self._parse_csv(self._parse_id_var), 341 unset=True, 342 ), 343 "SWAP": lambda self: self._parse_alter_table_swap(), 344 } 345 346 STATEMENT_PARSERS = { 347 **parser.Parser.STATEMENT_PARSERS, 348 TokenType.SHOW: lambda self: self._parse_show(), 349 } 350 351 PROPERTY_PARSERS = { 352 **parser.Parser.PROPERTY_PARSERS, 353 "LOCATION": lambda self: self._parse_location_property(), 354 } 355 356 TYPE_CONVERTERS = { 357 # https://docs.snowflake.com/en/sql-reference/data-types-numeric#number 358 exp.DataType.Type.DECIMAL: build_default_decimal_type(precision=38, scale=0), 359 } 360 361 SHOW_PARSERS = { 362 "SCHEMAS": _show_parser("SCHEMAS"), 363 "TERSE SCHEMAS": _show_parser("SCHEMAS"), 364 "OBJECTS": _show_parser("OBJECTS"), 365 "TERSE OBJECTS": _show_parser("OBJECTS"), 366 "TABLES": _show_parser("TABLES"), 367 "TERSE TABLES": _show_parser("TABLES"), 368 "VIEWS": _show_parser("VIEWS"), 369 "TERSE VIEWS": _show_parser("VIEWS"), 370 "PRIMARY KEYS": _show_parser("PRIMARY KEYS"), 371 "TERSE PRIMARY KEYS": _show_parser("PRIMARY KEYS"), 372 "IMPORTED KEYS": _show_parser("IMPORTED KEYS"), 373 "TERSE IMPORTED KEYS": _show_parser("IMPORTED KEYS"), 374 "UNIQUE KEYS": _show_parser("UNIQUE KEYS"), 375 "TERSE UNIQUE KEYS": _show_parser("UNIQUE KEYS"), 376 "SEQUENCES": _show_parser("SEQUENCES"), 377 "TERSE SEQUENCES": _show_parser("SEQUENCES"), 378 "COLUMNS": _show_parser("COLUMNS"), 379 "USERS": _show_parser("USERS"), 380 "TERSE USERS": _show_parser("USERS"), 381 } 382 383 CONSTRAINT_PARSERS = { 384 **parser.Parser.CONSTRAINT_PARSERS, 385 "WITH": lambda self: self._parse_with_constraint(), 386 "MASKING": lambda self: self._parse_with_constraint(), 387 "PROJECTION": lambda self: self._parse_with_constraint(), 388 "TAG": lambda self: self._parse_with_constraint(), 389 } 390 391 STAGED_FILE_SINGLE_TOKENS = { 392 TokenType.DOT, 393 TokenType.MOD, 394 TokenType.SLASH, 395 } 396 397 FLATTEN_COLUMNS = ["SEQ", "KEY", "PATH", "INDEX", "VALUE", "THIS"] 398 399 SCHEMA_KINDS = {"OBJECTS", "TABLES", "VIEWS", "SEQUENCES", "UNIQUE KEYS", "IMPORTED KEYS"} 400 401 NON_TABLE_CREATABLES = {"STORAGE INTEGRATION", "TAG", "WAREHOUSE", "STREAMLIT"} 402 403 LAMBDAS = { 404 **parser.Parser.LAMBDAS, 405 TokenType.ARROW: lambda self, expressions: self.expression( 406 exp.Lambda, 407 this=self._replace_lambda( 408 self._parse_assignment(), 409 expressions, 410 ), 411 expressions=[e.this if isinstance(e, exp.Cast) else e for e in expressions], 412 ), 413 } 414 415 def _parse_with_constraint(self) -> t.Optional[exp.Expression]: 416 if self._prev.token_type != TokenType.WITH: 417 self._retreat(self._index - 1) 418 419 if self._match_text_seq("MASKING", "POLICY"): 420 policy = self._parse_column() 421 return self.expression( 422 exp.MaskingPolicyColumnConstraint, 423 this=policy.to_dot() if isinstance(policy, exp.Column) else policy, 424 expressions=self._match(TokenType.USING) 425 and self._parse_wrapped_csv(self._parse_id_var), 426 ) 427 if self._match_text_seq("PROJECTION", "POLICY"): 428 policy = self._parse_column() 429 return self.expression( 430 exp.ProjectionPolicyColumnConstraint, 431 this=policy.to_dot() if isinstance(policy, exp.Column) else policy, 432 ) 433 if self._match(TokenType.TAG): 434 return self.expression( 435 exp.TagColumnConstraint, 436 expressions=self._parse_wrapped_csv(self._parse_property), 437 ) 438 439 return None 440 441 def _parse_create(self) -> exp.Create | exp.Command: 442 expression = super()._parse_create() 443 if isinstance(expression, exp.Create) and expression.kind in self.NON_TABLE_CREATABLES: 444 # Replace the Table node with the enclosed Identifier 445 expression.this.replace(expression.this.this) 446 447 return expression 448 449 # https://docs.snowflake.com/en/sql-reference/functions/date_part.html 450 # https://docs.snowflake.com/en/sql-reference/functions-date-time.html#label-supported-date-time-parts 451 def _parse_date_part(self: Snowflake.Parser) -> t.Optional[exp.Expression]: 452 this = self._parse_var() or self._parse_type() 453 454 if not this: 455 return None 456 457 self._match(TokenType.COMMA) 458 expression = self._parse_bitwise() 459 this = map_date_part(this) 460 name = this.name.upper() 461 462 if name.startswith("EPOCH"): 463 if name == "EPOCH_MILLISECOND": 464 scale = 10**3 465 elif name == "EPOCH_MICROSECOND": 466 scale = 10**6 467 elif name == "EPOCH_NANOSECOND": 468 scale = 10**9 469 else: 470 scale = None 471 472 ts = self.expression(exp.Cast, this=expression, to=exp.DataType.build("TIMESTAMP")) 473 to_unix: exp.Expression = self.expression(exp.TimeToUnix, this=ts) 474 475 if scale: 476 to_unix = exp.Mul(this=to_unix, expression=exp.Literal.number(scale)) 477 478 return to_unix 479 480 return self.expression(exp.Extract, this=this, expression=expression) 481 482 def _parse_bracket_key_value(self, is_map: bool = False) -> t.Optional[exp.Expression]: 483 if is_map: 484 # Keys are strings in Snowflake's objects, see also: 485 # - https://docs.snowflake.com/en/sql-reference/data-types-semistructured 486 # - https://docs.snowflake.com/en/sql-reference/functions/object_construct 487 return self._parse_slice(self._parse_string()) 488 489 return self._parse_slice(self._parse_alias(self._parse_assignment(), explicit=True)) 490 491 def _parse_lateral(self) -> t.Optional[exp.Lateral]: 492 lateral = super()._parse_lateral() 493 if not lateral: 494 return lateral 495 496 if isinstance(lateral.this, exp.Explode): 497 table_alias = lateral.args.get("alias") 498 columns = [exp.to_identifier(col) for col in self.FLATTEN_COLUMNS] 499 if table_alias and not table_alias.args.get("columns"): 500 table_alias.set("columns", columns) 501 elif not table_alias: 502 exp.alias_(lateral, "_flattened", table=columns, copy=False) 503 504 return lateral 505 506 def _parse_at_before(self, table: exp.Table) -> exp.Table: 507 # https://docs.snowflake.com/en/sql-reference/constructs/at-before 508 index = self._index 509 if self._match_texts(("AT", "BEFORE")): 510 this = self._prev.text.upper() 511 kind = ( 512 self._match(TokenType.L_PAREN) 513 and self._match_texts(self.HISTORICAL_DATA_KIND) 514 and self._prev.text.upper() 515 ) 516 expression = self._match(TokenType.FARROW) and self._parse_bitwise() 517 518 if expression: 519 self._match_r_paren() 520 when = self.expression( 521 exp.HistoricalData, this=this, kind=kind, expression=expression 522 ) 523 table.set("when", when) 524 else: 525 self._retreat(index) 526 527 return table 528 529 def _parse_table_parts( 530 self, schema: bool = False, is_db_reference: bool = False, wildcard: bool = False 531 ) -> exp.Table: 532 # https://docs.snowflake.com/en/user-guide/querying-stage 533 if self._match(TokenType.STRING, advance=False): 534 table = self._parse_string() 535 elif self._match_text_seq("@", advance=False): 536 table = self._parse_location_path() 537 else: 538 table = None 539 540 if table: 541 file_format = None 542 pattern = None 543 544 wrapped = self._match(TokenType.L_PAREN) 545 while self._curr and wrapped and not self._match(TokenType.R_PAREN): 546 if self._match_text_seq("FILE_FORMAT", "=>"): 547 file_format = self._parse_string() or super()._parse_table_parts( 548 is_db_reference=is_db_reference 549 ) 550 elif self._match_text_seq("PATTERN", "=>"): 551 pattern = self._parse_string() 552 else: 553 break 554 555 self._match(TokenType.COMMA) 556 557 table = self.expression(exp.Table, this=table, format=file_format, pattern=pattern) 558 else: 559 table = super()._parse_table_parts(schema=schema, is_db_reference=is_db_reference) 560 561 return self._parse_at_before(table) 562 563 def _parse_id_var( 564 self, 565 any_token: bool = True, 566 tokens: t.Optional[t.Collection[TokenType]] = None, 567 ) -> t.Optional[exp.Expression]: 568 if self._match_text_seq("IDENTIFIER", "("): 569 identifier = ( 570 super()._parse_id_var(any_token=any_token, tokens=tokens) 571 or self._parse_string() 572 ) 573 self._match_r_paren() 574 return self.expression(exp.Anonymous, this="IDENTIFIER", expressions=[identifier]) 575 576 return super()._parse_id_var(any_token=any_token, tokens=tokens) 577 578 def _parse_show_snowflake(self, this: str) -> exp.Show: 579 scope = None 580 scope_kind = None 581 582 # will identity SHOW TERSE SCHEMAS but not SHOW TERSE PRIMARY KEYS 583 # which is syntactically valid but has no effect on the output 584 terse = self._tokens[self._index - 2].text.upper() == "TERSE" 585 586 history = self._match_text_seq("HISTORY") 587 588 like = self._parse_string() if self._match(TokenType.LIKE) else None 589 590 if self._match(TokenType.IN): 591 if self._match_text_seq("ACCOUNT"): 592 scope_kind = "ACCOUNT" 593 elif self._match_set(self.DB_CREATABLES): 594 scope_kind = self._prev.text.upper() 595 if self._curr: 596 scope = self._parse_table_parts() 597 elif self._curr: 598 scope_kind = "SCHEMA" if this in self.SCHEMA_KINDS else "TABLE" 599 scope = self._parse_table_parts() 600 601 return self.expression( 602 exp.Show, 603 **{ 604 "terse": terse, 605 "this": this, 606 "history": history, 607 "like": like, 608 "scope": scope, 609 "scope_kind": scope_kind, 610 "starts_with": self._match_text_seq("STARTS", "WITH") and self._parse_string(), 611 "limit": self._parse_limit(), 612 "from": self._parse_string() if self._match(TokenType.FROM) else None, 613 }, 614 ) 615 616 def _parse_alter_table_swap(self) -> exp.SwapTable: 617 self._match_text_seq("WITH") 618 return self.expression(exp.SwapTable, this=self._parse_table(schema=True)) 619 620 def _parse_location_property(self) -> exp.LocationProperty: 621 self._match(TokenType.EQ) 622 return self.expression(exp.LocationProperty, this=self._parse_location_path()) 623 624 def _parse_file_location(self) -> t.Optional[exp.Expression]: 625 # Parse either a subquery or a staged file 626 return ( 627 self._parse_select(table=True, parse_subquery_alias=False) 628 if self._match(TokenType.L_PAREN, advance=False) 629 else self._parse_table_parts() 630 ) 631 632 def _parse_location_path(self) -> exp.Var: 633 parts = [self._advance_any(ignore_reserved=True)] 634 635 # We avoid consuming a comma token because external tables like @foo and @bar 636 # can be joined in a query with a comma separator, as well as closing paren 637 # in case of subqueries 638 while self._is_connected() and not self._match_set( 639 (TokenType.COMMA, TokenType.R_PAREN), advance=False 640 ): 641 parts.append(self._advance_any(ignore_reserved=True)) 642 643 return exp.var("".join(part.text for part in parts if part)) 644 645 def _parse_lambda_arg(self) -> t.Optional[exp.Expression]: 646 this = super()._parse_lambda_arg() 647 648 if not this: 649 return this 650 651 typ = self._parse_types() 652 653 if typ: 654 return self.expression(exp.Cast, this=this, to=typ) 655 656 return this
Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree.
Arguments:
- error_level: The desired error level. Default: ErrorLevel.IMMEDIATE
- error_message_context: The amount of context to capture from a query string when displaying the error message (in number of characters). Default: 100
- max_errors: Maximum number of error messages to include in a raised ParseError. This is only relevant if error_level is ErrorLevel.RAISE. Default: 3
Inherited Members
- sqlglot.parser.Parser
- Parser
- NO_PAREN_FUNCTIONS
- STRUCT_TYPE_TOKENS
- NESTED_TYPE_TOKENS
- ENUM_TYPE_TOKENS
- AGGREGATE_TYPE_TOKENS
- TYPE_TOKENS
- SIGNED_TO_UNSIGNED_TYPE_TOKEN
- SUBQUERY_PREDICATES
- RESERVED_TOKENS
- DB_CREATABLES
- CREATABLES
- INTERVAL_VARS
- ALIAS_TOKENS
- ARRAY_CONSTRUCTORS
- COMMENT_TABLE_ALIAS_TOKENS
- UPDATE_ALIAS_TOKENS
- TRIM_TYPES
- FUNC_TOKENS
- CONJUNCTION
- ASSIGNMENT
- DISJUNCTION
- EQUALITY
- COMPARISON
- BITWISE
- TERM
- FACTOR
- EXPONENT
- TIMES
- SET_OPERATIONS
- JOIN_METHODS
- JOIN_SIDES
- JOIN_KINDS
- JOIN_HINTS
- COLUMN_OPERATORS
- EXPRESSION_PARSERS
- UNARY_PARSERS
- STRING_PARSERS
- NUMERIC_PARSERS
- PRIMARY_PARSERS
- PLACEHOLDER_PARSERS
- ALTER_ALTER_PARSERS
- SCHEMA_UNNAMED_CONSTRAINTS
- NO_PAREN_FUNCTION_PARSERS
- INVALID_FUNC_NAME_TOKENS
- FUNCTIONS_WITH_ALIASED_ARGS
- KEY_VALUE_DEFINITIONS
- QUERY_MODIFIER_PARSERS
- SET_PARSERS
- TYPE_LITERAL_PARSERS
- DDL_SELECT_TOKENS
- PRE_VOLATILE_TOKENS
- TRANSACTION_KIND
- TRANSACTION_CHARACTERISTICS
- CONFLICT_ACTIONS
- CREATE_SEQUENCE
- ISOLATED_LOADING_OPTIONS
- USABLES
- CAST_ACTIONS
- INSERT_ALTERNATIVES
- CLONE_KEYWORDS
- HISTORICAL_DATA_KIND
- OPCLASS_FOLLOW_KEYWORDS
- OPTYPE_FOLLOW_TOKENS
- TABLE_INDEX_HINT_TOKENS
- VIEW_ATTRIBUTES
- WINDOW_ALIAS_TOKENS
- WINDOW_BEFORE_PAREN_TOKENS
- WINDOW_SIDES
- JSON_KEY_VALUE_SEPARATOR_TOKENS
- FETCH_TOKENS
- ADD_CONSTRAINT_TOKENS
- DISTINCT_TOKENS
- NULL_TOKENS
- UNNEST_OFFSET_ALIAS_TOKENS
- SELECT_START_TOKENS
- COPY_INTO_VARLEN_OPTIONS
- STRICT_CAST
- PREFIXED_PIVOT_COLUMNS
- LOG_DEFAULTS_TO_LN
- ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN
- TABLESAMPLE_CSV
- SET_REQUIRES_ASSIGNMENT_DELIMITER
- TRIM_PATTERN_FIRST
- STRING_ALIASES
- MODIFIERS_ATTACHED_TO_SET_OP
- SET_OP_MODIFIERS
- NO_PAREN_IF_COMMANDS
- JSON_ARROWS_REQUIRE_JSON_TYPE
- VALUES_FOLLOWED_BY_PAREN
- SUPPORTS_IMPLICIT_UNNEST
- INTERVAL_SPANS
- SUPPORTS_PARTITION_SELECTION
- error_level
- error_message_context
- max_errors
- dialect
- reset
- parse
- parse_into
- check_errors
- raise_error
- expression
- validate_expression
- errors
- sql
658 class Tokenizer(tokens.Tokenizer): 659 STRING_ESCAPES = ["\\", "'"] 660 HEX_STRINGS = [("x'", "'"), ("X'", "'")] 661 RAW_STRINGS = ["$$"] 662 COMMENTS = ["--", "//", ("/*", "*/")] 663 664 KEYWORDS = { 665 **tokens.Tokenizer.KEYWORDS, 666 "BYTEINT": TokenType.INT, 667 "CHAR VARYING": TokenType.VARCHAR, 668 "CHARACTER VARYING": TokenType.VARCHAR, 669 "EXCLUDE": TokenType.EXCEPT, 670 "ILIKE ANY": TokenType.ILIKE_ANY, 671 "LIKE ANY": TokenType.LIKE_ANY, 672 "MATCH_CONDITION": TokenType.MATCH_CONDITION, 673 "MATCH_RECOGNIZE": TokenType.MATCH_RECOGNIZE, 674 "MINUS": TokenType.EXCEPT, 675 "NCHAR VARYING": TokenType.VARCHAR, 676 "PUT": TokenType.COMMAND, 677 "REMOVE": TokenType.COMMAND, 678 "RM": TokenType.COMMAND, 679 "SAMPLE": TokenType.TABLE_SAMPLE, 680 "SQL_DOUBLE": TokenType.DOUBLE, 681 "SQL_VARCHAR": TokenType.VARCHAR, 682 "STORAGE INTEGRATION": TokenType.STORAGE_INTEGRATION, 683 "TAG": TokenType.TAG, 684 "TIMESTAMP_TZ": TokenType.TIMESTAMPTZ, 685 "TOP": TokenType.TOP, 686 "WAREHOUSE": TokenType.WAREHOUSE, 687 "STREAMLIT": TokenType.STREAMLIT, 688 } 689 KEYWORDS.pop("/*+") 690 691 SINGLE_TOKENS = { 692 **tokens.Tokenizer.SINGLE_TOKENS, 693 "$": TokenType.PARAMETER, 694 } 695 696 VAR_SINGLE_TOKENS = {"$"} 697 698 COMMANDS = tokens.Tokenizer.COMMANDS - {TokenType.SHOW}
Inherited Members
- sqlglot.tokens.Tokenizer
- Tokenizer
- BIT_STRINGS
- BYTE_STRINGS
- HEREDOC_STRINGS
- UNICODE_STRINGS
- IDENTIFIERS
- IDENTIFIER_ESCAPES
- QUOTES
- HEREDOC_TAG_IS_IDENTIFIER
- HEREDOC_STRING_ALTERNATIVE
- STRING_ESCAPES_ALLOWED_IN_RAW_STRINGS
- WHITE_SPACE
- COMMAND_PREFIX_TOKENS
- NUMERIC_LITERALS
- dialect
- reset
- tokenize
- tokenize_rs
- size
- sql
- tokens
700 class Generator(generator.Generator): 701 PARAMETER_TOKEN = "$" 702 MATCHED_BY_SOURCE = False 703 SINGLE_STRING_INTERVAL = True 704 JOIN_HINTS = False 705 TABLE_HINTS = False 706 QUERY_HINTS = False 707 AGGREGATE_FILTER_SUPPORTED = False 708 SUPPORTS_TABLE_COPY = False 709 COLLATE_IS_FUNC = True 710 LIMIT_ONLY_LITERALS = True 711 JSON_KEY_VALUE_PAIR_SEP = "," 712 INSERT_OVERWRITE = " OVERWRITE INTO" 713 STRUCT_DELIMITER = ("(", ")") 714 COPY_PARAMS_ARE_WRAPPED = False 715 COPY_PARAMS_EQ_REQUIRED = True 716 STAR_EXCEPT = "EXCLUDE" 717 718 TRANSFORMS = { 719 **generator.Generator.TRANSFORMS, 720 exp.ApproxDistinct: rename_func("APPROX_COUNT_DISTINCT"), 721 exp.ArgMax: rename_func("MAX_BY"), 722 exp.ArgMin: rename_func("MIN_BY"), 723 exp.Array: inline_array_sql, 724 exp.ArrayConcat: rename_func("ARRAY_CAT"), 725 exp.ArrayContains: lambda self, e: self.func("ARRAY_CONTAINS", e.expression, e.this), 726 exp.AtTimeZone: lambda self, e: self.func( 727 "CONVERT_TIMEZONE", e.args.get("zone"), e.this 728 ), 729 exp.BitwiseXor: rename_func("BITXOR"), 730 exp.Create: transforms.preprocess([_flatten_structured_types_unless_iceberg]), 731 exp.DateAdd: date_delta_sql("DATEADD"), 732 exp.DateDiff: date_delta_sql("DATEDIFF"), 733 exp.DateStrToDate: datestrtodate_sql, 734 exp.DayOfMonth: rename_func("DAYOFMONTH"), 735 exp.DayOfWeek: rename_func("DAYOFWEEK"), 736 exp.DayOfYear: rename_func("DAYOFYEAR"), 737 exp.Explode: rename_func("FLATTEN"), 738 exp.Extract: rename_func("DATE_PART"), 739 exp.FromTimeZone: lambda self, e: self.func( 740 "CONVERT_TIMEZONE", e.args.get("zone"), "'UTC'", e.this 741 ), 742 exp.GenerateSeries: lambda self, e: self.func( 743 "ARRAY_GENERATE_RANGE", e.args["start"], e.args["end"] + 1, e.args.get("step") 744 ), 745 exp.GroupConcat: rename_func("LISTAGG"), 746 exp.If: if_sql(name="IFF", false_value="NULL"), 747 exp.JSONExtract: lambda self, e: self.func("GET_PATH", e.this, e.expression), 748 exp.JSONExtractScalar: lambda self, e: self.func( 749 "JSON_EXTRACT_PATH_TEXT", e.this, e.expression 750 ), 751 exp.JSONObject: lambda self, e: self.func("OBJECT_CONSTRUCT_KEEP_NULL", *e.expressions), 752 exp.JSONPathRoot: lambda *_: "", 753 exp.LogicalAnd: rename_func("BOOLAND_AGG"), 754 exp.LogicalOr: rename_func("BOOLOR_AGG"), 755 exp.Map: lambda self, e: var_map_sql(self, e, "OBJECT_CONSTRUCT"), 756 exp.Max: max_or_greatest, 757 exp.Min: min_or_least, 758 exp.ParseJSON: lambda self, e: self.func( 759 "TRY_PARSE_JSON" if e.args.get("safe") else "PARSE_JSON", e.this 760 ), 761 exp.PartitionedByProperty: lambda self, e: f"PARTITION BY {self.sql(e, 'this')}", 762 exp.PercentileCont: transforms.preprocess( 763 [transforms.add_within_group_for_percentiles] 764 ), 765 exp.PercentileDisc: transforms.preprocess( 766 [transforms.add_within_group_for_percentiles] 767 ), 768 exp.Pivot: transforms.preprocess([_unqualify_unpivot_columns]), 769 exp.RegexpILike: _regexpilike_sql, 770 exp.Rand: rename_func("RANDOM"), 771 exp.Select: transforms.preprocess( 772 [ 773 transforms.eliminate_distinct_on, 774 transforms.explode_to_unnest(), 775 transforms.eliminate_semi_and_anti_joins, 776 ] 777 ), 778 exp.SHA: rename_func("SHA1"), 779 exp.StarMap: rename_func("OBJECT_CONSTRUCT"), 780 exp.StartsWith: rename_func("STARTSWITH"), 781 exp.StrPosition: lambda self, e: self.func( 782 "POSITION", e.args.get("substr"), e.this, e.args.get("position") 783 ), 784 exp.StrToTime: lambda self, e: self.func("TO_TIMESTAMP", e.this, self.format_time(e)), 785 exp.Stuff: rename_func("INSERT"), 786 exp.TimeAdd: date_delta_sql("TIMEADD"), 787 exp.TimestampDiff: lambda self, e: self.func( 788 "TIMESTAMPDIFF", e.unit, e.expression, e.this 789 ), 790 exp.TimestampTrunc: timestamptrunc_sql(), 791 exp.TimeStrToTime: timestrtotime_sql, 792 exp.TimeToStr: lambda self, e: self.func( 793 "TO_CHAR", exp.cast(e.this, exp.DataType.Type.TIMESTAMP), self.format_time(e) 794 ), 795 exp.TimeToUnix: lambda self, e: f"EXTRACT(epoch_second FROM {self.sql(e, 'this')})", 796 exp.ToArray: rename_func("TO_ARRAY"), 797 exp.ToChar: lambda self, e: self.function_fallback_sql(e), 798 exp.Trim: lambda self, e: self.func("TRIM", e.this, e.expression), 799 exp.TsOrDsAdd: date_delta_sql("DATEADD", cast=True), 800 exp.TsOrDsDiff: date_delta_sql("DATEDIFF"), 801 exp.TsOrDsToDate: lambda self, e: self.func( 802 "TRY_TO_DATE" if e.args.get("safe") else "TO_DATE", e.this, self.format_time(e) 803 ), 804 exp.UnixToTime: rename_func("TO_TIMESTAMP"), 805 exp.VarMap: lambda self, e: var_map_sql(self, e, "OBJECT_CONSTRUCT"), 806 exp.WeekOfYear: rename_func("WEEKOFYEAR"), 807 exp.Xor: rename_func("BOOLXOR"), 808 } 809 810 SUPPORTED_JSON_PATH_PARTS = { 811 exp.JSONPathKey, 812 exp.JSONPathRoot, 813 exp.JSONPathSubscript, 814 } 815 816 TYPE_MAPPING = { 817 **generator.Generator.TYPE_MAPPING, 818 exp.DataType.Type.NESTED: "OBJECT", 819 exp.DataType.Type.STRUCT: "OBJECT", 820 } 821 822 PROPERTIES_LOCATION = { 823 **generator.Generator.PROPERTIES_LOCATION, 824 exp.SetProperty: exp.Properties.Location.UNSUPPORTED, 825 exp.VolatileProperty: exp.Properties.Location.UNSUPPORTED, 826 } 827 828 UNSUPPORTED_VALUES_EXPRESSIONS = { 829 exp.Map, 830 exp.StarMap, 831 exp.Struct, 832 exp.VarMap, 833 } 834 835 def with_properties(self, properties: exp.Properties) -> str: 836 return self.properties(properties, wrapped=False, prefix=self.sep(""), sep=" ") 837 838 def values_sql(self, expression: exp.Values, values_as_table: bool = True) -> str: 839 if expression.find(*self.UNSUPPORTED_VALUES_EXPRESSIONS): 840 values_as_table = False 841 842 return super().values_sql(expression, values_as_table=values_as_table) 843 844 def datatype_sql(self, expression: exp.DataType) -> str: 845 expressions = expression.expressions 846 if ( 847 expressions 848 and expression.is_type(*exp.DataType.STRUCT_TYPES) 849 and any(isinstance(field_type, exp.DataType) for field_type in expressions) 850 ): 851 # The correct syntax is OBJECT [ (<key> <value_type [NOT NULL] [, ...]) ] 852 return "OBJECT" 853 854 return super().datatype_sql(expression) 855 856 def tonumber_sql(self, expression: exp.ToNumber) -> str: 857 return self.func( 858 "TO_NUMBER", 859 expression.this, 860 expression.args.get("format"), 861 expression.args.get("precision"), 862 expression.args.get("scale"), 863 ) 864 865 def timestampfromparts_sql(self, expression: exp.TimestampFromParts) -> str: 866 milli = expression.args.get("milli") 867 if milli is not None: 868 milli_to_nano = milli.pop() * exp.Literal.number(1000000) 869 expression.set("nano", milli_to_nano) 870 871 return rename_func("TIMESTAMP_FROM_PARTS")(self, expression) 872 873 def trycast_sql(self, expression: exp.TryCast) -> str: 874 value = expression.this 875 876 if value.type is None: 877 from sqlglot.optimizer.annotate_types import annotate_types 878 879 value = annotate_types(value) 880 881 if value.is_type(*exp.DataType.TEXT_TYPES, exp.DataType.Type.UNKNOWN): 882 return super().trycast_sql(expression) 883 884 # TRY_CAST only works for string values in Snowflake 885 return self.cast_sql(expression) 886 887 def log_sql(self, expression: exp.Log) -> str: 888 if not expression.expression: 889 return self.func("LN", expression.this) 890 891 return super().log_sql(expression) 892 893 def unnest_sql(self, expression: exp.Unnest) -> str: 894 unnest_alias = expression.args.get("alias") 895 offset = expression.args.get("offset") 896 897 columns = [ 898 exp.to_identifier("seq"), 899 exp.to_identifier("key"), 900 exp.to_identifier("path"), 901 offset.pop() if isinstance(offset, exp.Expression) else exp.to_identifier("index"), 902 seq_get(unnest_alias.columns if unnest_alias else [], 0) 903 or exp.to_identifier("value"), 904 exp.to_identifier("this"), 905 ] 906 907 if unnest_alias: 908 unnest_alias.set("columns", columns) 909 else: 910 unnest_alias = exp.TableAlias(this="_u", columns=columns) 911 912 explode = f"TABLE(FLATTEN(INPUT => {self.sql(expression.expressions[0])}))" 913 alias = self.sql(unnest_alias) 914 alias = f" AS {alias}" if alias else "" 915 return f"{explode}{alias}" 916 917 def show_sql(self, expression: exp.Show) -> str: 918 terse = "TERSE " if expression.args.get("terse") else "" 919 history = " HISTORY" if expression.args.get("history") else "" 920 like = self.sql(expression, "like") 921 like = f" LIKE {like}" if like else "" 922 923 scope = self.sql(expression, "scope") 924 scope = f" {scope}" if scope else "" 925 926 scope_kind = self.sql(expression, "scope_kind") 927 if scope_kind: 928 scope_kind = f" IN {scope_kind}" 929 930 starts_with = self.sql(expression, "starts_with") 931 if starts_with: 932 starts_with = f" STARTS WITH {starts_with}" 933 934 limit = self.sql(expression, "limit") 935 936 from_ = self.sql(expression, "from") 937 if from_: 938 from_ = f" FROM {from_}" 939 940 return f"SHOW {terse}{expression.name}{history}{like}{scope_kind}{scope}{starts_with}{limit}{from_}" 941 942 def regexpextract_sql(self, expression: exp.RegexpExtract) -> str: 943 # Other dialects don't support all of the following parameters, so we need to 944 # generate default values as necessary to ensure the transpilation is correct 945 group = expression.args.get("group") 946 parameters = expression.args.get("parameters") or (group and exp.Literal.string("c")) 947 occurrence = expression.args.get("occurrence") or (parameters and exp.Literal.number(1)) 948 position = expression.args.get("position") or (occurrence and exp.Literal.number(1)) 949 950 return self.func( 951 "REGEXP_SUBSTR", 952 expression.this, 953 expression.expression, 954 position, 955 occurrence, 956 parameters, 957 group, 958 ) 959 960 def except_op(self, expression: exp.Except) -> str: 961 if not expression.args.get("distinct"): 962 self.unsupported("EXCEPT with All is not supported in Snowflake") 963 return super().except_op(expression) 964 965 def intersect_op(self, expression: exp.Intersect) -> str: 966 if not expression.args.get("distinct"): 967 self.unsupported("INTERSECT with All is not supported in Snowflake") 968 return super().intersect_op(expression) 969 970 def describe_sql(self, expression: exp.Describe) -> str: 971 # Default to table if kind is unknown 972 kind_value = expression.args.get("kind") or "TABLE" 973 kind = f" {kind_value}" if kind_value else "" 974 this = f" {self.sql(expression, 'this')}" 975 expressions = self.expressions(expression, flat=True) 976 expressions = f" {expressions}" if expressions else "" 977 return f"DESCRIBE{kind}{this}{expressions}" 978 979 def generatedasidentitycolumnconstraint_sql( 980 self, expression: exp.GeneratedAsIdentityColumnConstraint 981 ) -> str: 982 start = expression.args.get("start") 983 start = f" START {start}" if start else "" 984 increment = expression.args.get("increment") 985 increment = f" INCREMENT {increment}" if increment else "" 986 return f"AUTOINCREMENT{start}{increment}" 987 988 def swaptable_sql(self, expression: exp.SwapTable) -> str: 989 this = self.sql(expression, "this") 990 return f"SWAP WITH {this}" 991 992 def cluster_sql(self, expression: exp.Cluster) -> str: 993 return f"CLUSTER BY ({self.expressions(expression, flat=True)})" 994 995 def struct_sql(self, expression: exp.Struct) -> str: 996 keys = [] 997 values = [] 998 999 for i, e in enumerate(expression.expressions): 1000 if isinstance(e, exp.PropertyEQ): 1001 keys.append( 1002 exp.Literal.string(e.name) if isinstance(e.this, exp.Identifier) else e.this 1003 ) 1004 values.append(e.expression) 1005 else: 1006 keys.append(exp.Literal.string(f"_{i}")) 1007 values.append(e) 1008 1009 return self.func("OBJECT_CONSTRUCT", *flatten(zip(keys, values))) 1010 1011 def approxquantile_sql(self, expression: exp.ApproxQuantile) -> str: 1012 if expression.args.get("weight") or expression.args.get("accuracy"): 1013 self.unsupported( 1014 "APPROX_PERCENTILE with weight and/or accuracy arguments are not supported in Snowflake" 1015 ) 1016 1017 return self.func("APPROX_PERCENTILE", expression.this, expression.args.get("quantile")) 1018 1019 def alterset_sql(self, expression: exp.AlterSet) -> str: 1020 exprs = self.expressions(expression, flat=True) 1021 exprs = f" {exprs}" if exprs else "" 1022 file_format = self.expressions(expression, key="file_format", flat=True, sep=" ") 1023 file_format = f" STAGE_FILE_FORMAT = ({file_format})" if file_format else "" 1024 copy_options = self.expressions(expression, key="copy_options", flat=True, sep=" ") 1025 copy_options = f" STAGE_COPY_OPTIONS = ({copy_options})" if copy_options else "" 1026 tag = self.expressions(expression, key="tag", flat=True) 1027 tag = f" TAG {tag}" if tag else "" 1028 1029 return f"SET{exprs}{file_format}{copy_options}{tag}"
Generator converts a given syntax tree to the corresponding SQL string.
Arguments:
- pretty: Whether to format the produced SQL string. Default: False.
- identify: Determines when an identifier should be quoted. Possible values are: False (default): Never quote, except in cases where it's mandatory by the dialect. True or 'always': Always quote. 'safe': Only quote identifiers that are case insensitive.
- normalize: Whether to normalize identifiers to lowercase. Default: False.
- pad: The pad size in a formatted string. For example, this affects the indentation of a projection in a query, relative to its nesting level. Default: 2.
- indent: The indentation size in a formatted string. For example, this affects the
indentation of subqueries and filters under a
WHERE
clause. Default: 2. - normalize_functions: How to normalize function names. Possible values are: "upper" or True (default): Convert names to uppercase. "lower": Convert names to lowercase. False: Disables function name normalization.
- unsupported_level: Determines the generator's behavior when it encounters unsupported expressions. Default ErrorLevel.WARN.
- max_unsupported: Maximum number of unsupported messages to include in a raised UnsupportedError. This is only relevant if unsupported_level is ErrorLevel.RAISE. Default: 3
- leading_comma: Whether the comma is leading or trailing in select expressions. This is only relevant when generating in pretty mode. Default: False
- max_text_width: The max number of characters in a segment before creating new lines in pretty mode. The default is on the smaller end because the length only represents a segment and not the true line length. Default: 80
- comments: Whether to preserve comments in the output SQL code. Default: True
844 def datatype_sql(self, expression: exp.DataType) -> str: 845 expressions = expression.expressions 846 if ( 847 expressions 848 and expression.is_type(*exp.DataType.STRUCT_TYPES) 849 and any(isinstance(field_type, exp.DataType) for field_type in expressions) 850 ): 851 # The correct syntax is OBJECT [ (<key> <value_type [NOT NULL] [, ...]) ] 852 return "OBJECT" 853 854 return super().datatype_sql(expression)
865 def timestampfromparts_sql(self, expression: exp.TimestampFromParts) -> str: 866 milli = expression.args.get("milli") 867 if milli is not None: 868 milli_to_nano = milli.pop() * exp.Literal.number(1000000) 869 expression.set("nano", milli_to_nano) 870 871 return rename_func("TIMESTAMP_FROM_PARTS")(self, expression)
873 def trycast_sql(self, expression: exp.TryCast) -> str: 874 value = expression.this 875 876 if value.type is None: 877 from sqlglot.optimizer.annotate_types import annotate_types 878 879 value = annotate_types(value) 880 881 if value.is_type(*exp.DataType.TEXT_TYPES, exp.DataType.Type.UNKNOWN): 882 return super().trycast_sql(expression) 883 884 # TRY_CAST only works for string values in Snowflake 885 return self.cast_sql(expression)
893 def unnest_sql(self, expression: exp.Unnest) -> str: 894 unnest_alias = expression.args.get("alias") 895 offset = expression.args.get("offset") 896 897 columns = [ 898 exp.to_identifier("seq"), 899 exp.to_identifier("key"), 900 exp.to_identifier("path"), 901 offset.pop() if isinstance(offset, exp.Expression) else exp.to_identifier("index"), 902 seq_get(unnest_alias.columns if unnest_alias else [], 0) 903 or exp.to_identifier("value"), 904 exp.to_identifier("this"), 905 ] 906 907 if unnest_alias: 908 unnest_alias.set("columns", columns) 909 else: 910 unnest_alias = exp.TableAlias(this="_u", columns=columns) 911 912 explode = f"TABLE(FLATTEN(INPUT => {self.sql(expression.expressions[0])}))" 913 alias = self.sql(unnest_alias) 914 alias = f" AS {alias}" if alias else "" 915 return f"{explode}{alias}"
917 def show_sql(self, expression: exp.Show) -> str: 918 terse = "TERSE " if expression.args.get("terse") else "" 919 history = " HISTORY" if expression.args.get("history") else "" 920 like = self.sql(expression, "like") 921 like = f" LIKE {like}" if like else "" 922 923 scope = self.sql(expression, "scope") 924 scope = f" {scope}" if scope else "" 925 926 scope_kind = self.sql(expression, "scope_kind") 927 if scope_kind: 928 scope_kind = f" IN {scope_kind}" 929 930 starts_with = self.sql(expression, "starts_with") 931 if starts_with: 932 starts_with = f" STARTS WITH {starts_with}" 933 934 limit = self.sql(expression, "limit") 935 936 from_ = self.sql(expression, "from") 937 if from_: 938 from_ = f" FROM {from_}" 939 940 return f"SHOW {terse}{expression.name}{history}{like}{scope_kind}{scope}{starts_with}{limit}{from_}"
942 def regexpextract_sql(self, expression: exp.RegexpExtract) -> str: 943 # Other dialects don't support all of the following parameters, so we need to 944 # generate default values as necessary to ensure the transpilation is correct 945 group = expression.args.get("group") 946 parameters = expression.args.get("parameters") or (group and exp.Literal.string("c")) 947 occurrence = expression.args.get("occurrence") or (parameters and exp.Literal.number(1)) 948 position = expression.args.get("position") or (occurrence and exp.Literal.number(1)) 949 950 return self.func( 951 "REGEXP_SUBSTR", 952 expression.this, 953 expression.expression, 954 position, 955 occurrence, 956 parameters, 957 group, 958 )
970 def describe_sql(self, expression: exp.Describe) -> str: 971 # Default to table if kind is unknown 972 kind_value = expression.args.get("kind") or "TABLE" 973 kind = f" {kind_value}" if kind_value else "" 974 this = f" {self.sql(expression, 'this')}" 975 expressions = self.expressions(expression, flat=True) 976 expressions = f" {expressions}" if expressions else "" 977 return f"DESCRIBE{kind}{this}{expressions}"
979 def generatedasidentitycolumnconstraint_sql( 980 self, expression: exp.GeneratedAsIdentityColumnConstraint 981 ) -> str: 982 start = expression.args.get("start") 983 start = f" START {start}" if start else "" 984 increment = expression.args.get("increment") 985 increment = f" INCREMENT {increment}" if increment else "" 986 return f"AUTOINCREMENT{start}{increment}"
995 def struct_sql(self, expression: exp.Struct) -> str: 996 keys = [] 997 values = [] 998 999 for i, e in enumerate(expression.expressions): 1000 if isinstance(e, exp.PropertyEQ): 1001 keys.append( 1002 exp.Literal.string(e.name) if isinstance(e.this, exp.Identifier) else e.this 1003 ) 1004 values.append(e.expression) 1005 else: 1006 keys.append(exp.Literal.string(f"_{i}")) 1007 values.append(e) 1008 1009 return self.func("OBJECT_CONSTRUCT", *flatten(zip(keys, values)))
1011 def approxquantile_sql(self, expression: exp.ApproxQuantile) -> str: 1012 if expression.args.get("weight") or expression.args.get("accuracy"): 1013 self.unsupported( 1014 "APPROX_PERCENTILE with weight and/or accuracy arguments are not supported in Snowflake" 1015 ) 1016 1017 return self.func("APPROX_PERCENTILE", expression.this, expression.args.get("quantile"))
1019 def alterset_sql(self, expression: exp.AlterSet) -> str: 1020 exprs = self.expressions(expression, flat=True) 1021 exprs = f" {exprs}" if exprs else "" 1022 file_format = self.expressions(expression, key="file_format", flat=True, sep=" ") 1023 file_format = f" STAGE_FILE_FORMAT = ({file_format})" if file_format else "" 1024 copy_options = self.expressions(expression, key="copy_options", flat=True, sep=" ") 1025 copy_options = f" STAGE_COPY_OPTIONS = ({copy_options})" if copy_options else "" 1026 tag = self.expressions(expression, key="tag", flat=True) 1027 tag = f" TAG {tag}" if tag else "" 1028 1029 return f"SET{exprs}{file_format}{copy_options}{tag}"
Inherited Members
- sqlglot.generator.Generator
- Generator
- NULL_ORDERING_SUPPORTED
- IGNORE_NULLS_IN_FUNC
- LOCKING_READS_SUPPORTED
- EXPLICIT_SET_OP
- WRAP_DERIVED_VALUES
- CREATE_FUNCTION_RETURN_AS
- INTERVAL_ALLOWS_PLURAL_FORM
- LIMIT_FETCH
- RENAME_TABLE_WITH_DB
- GROUPINGS_SEP
- INDEX_ON
- QUERY_HINT_SEP
- IS_BOOL_ALLOWED
- DUPLICATE_KEY_UPDATE_WITH_SET
- LIMIT_IS_TOP
- RETURNING_END
- EXTRACT_ALLOWS_QUOTES
- TZ_TO_WITH_TIME_ZONE
- NVL2_SUPPORTED
- VALUES_AS_TABLE
- ALTER_TABLE_INCLUDE_COLUMN_KEYWORD
- UNNEST_WITH_ORDINALITY
- SEMI_ANTI_JOIN_WITH_SIDE
- COMPUTED_COLUMN_WITH_TYPE
- TABLESAMPLE_REQUIRES_PARENS
- TABLESAMPLE_SIZE_IS_ROWS
- TABLESAMPLE_KEYWORDS
- TABLESAMPLE_WITH_METHOD
- TABLESAMPLE_SEED_KEYWORD
- DATA_TYPE_SPECIFIERS_ALLOWED
- ENSURE_BOOLS
- CTE_RECURSIVE_KEYWORD_REQUIRED
- SUPPORTS_SINGLE_ARG_CONCAT
- LAST_DAY_SUPPORTS_DATE_PART
- SUPPORTS_TABLE_ALIAS_COLUMNS
- UNPIVOT_ALIASES_ARE_IDENTIFIERS
- SUPPORTS_SELECT_INTO
- SUPPORTS_UNLOGGED_TABLES
- SUPPORTS_CREATE_TABLE_LIKE
- LIKE_PROPERTY_INSIDE_SCHEMA
- MULTI_ARG_DISTINCT
- JSON_TYPE_REQUIRED_FOR_EXTRACTION
- JSON_PATH_BRACKETED_KEY_SUPPORTED
- JSON_PATH_SINGLE_QUOTE_ESCAPE
- CAN_IMPLEMENT_ARRAY_ANY
- SUPPORTS_TO_NUMBER
- SET_OP_MODIFIERS
- COPY_HAS_INTO_KEYWORD
- HEX_FUNC
- WITH_PROPERTIES_PREFIX
- QUOTE_JSON_PATH
- TIME_PART_SINGULARS
- TOKEN_MAPPING
- NAMED_PLACEHOLDER_TOKEN
- RESERVED_KEYWORDS
- WITH_SEPARATED_COMMENTS
- EXCLUDE_COMMENTS
- UNWRAPPED_INTERVAL_VALUES
- PARAMETERIZABLE_TEXT_TYPES
- EXPRESSIONS_WITHOUT_NESTED_CTES
- SENTINEL_LINE_BREAK
- pretty
- identify
- normalize
- pad
- unsupported_level
- max_unsupported
- leading_comma
- max_text_width
- comments
- dialect
- normalize_functions
- unsupported_messages
- generate
- preprocess
- unsupported
- sep
- seg
- pad_comment
- maybe_comment
- wrap
- no_identify
- normalize_func
- indent
- sql
- uncache_sql
- cache_sql
- characterset_sql
- column_parts
- column_sql
- columnposition_sql
- columndef_sql
- columnconstraint_sql
- computedcolumnconstraint_sql
- autoincrementcolumnconstraint_sql
- compresscolumnconstraint_sql
- generatedasrowcolumnconstraint_sql
- periodforsystemtimeconstraint_sql
- notnullcolumnconstraint_sql
- transformcolumnconstraint_sql
- primarykeycolumnconstraint_sql
- uniquecolumnconstraint_sql
- createable_sql
- create_sql
- sequenceproperties_sql
- clone_sql
- heredoc_sql
- prepend_ctes
- with_sql
- cte_sql
- tablealias_sql
- bitstring_sql
- hexstring_sql
- bytestring_sql
- unicodestring_sql
- rawstring_sql
- datatypeparam_sql
- directory_sql
- delete_sql
- drop_sql
- except_sql
- fetch_sql
- filter_sql
- hint_sql
- indexparameters_sql
- index_sql
- identifier_sql
- hex_sql
- lowerhex_sql
- inputoutputformat_sql
- national_sql
- partition_sql
- properties_sql
- root_properties
- properties
- locate_properties
- property_name
- property_sql
- likeproperty_sql
- fallbackproperty_sql
- journalproperty_sql
- freespaceproperty_sql
- checksumproperty_sql
- mergeblockratioproperty_sql
- datablocksizeproperty_sql
- blockcompressionproperty_sql
- isolatedloadingproperty_sql
- partitionboundspec_sql
- partitionedofproperty_sql
- lockingproperty_sql
- withdataproperty_sql
- withsystemversioningproperty_sql
- insert_sql
- intersect_sql
- introducer_sql
- kill_sql
- pseudotype_sql
- objectidentifier_sql
- onconflict_sql
- returning_sql
- rowformatdelimitedproperty_sql
- withtablehint_sql
- indextablehint_sql
- historicaldata_sql
- table_parts
- table_sql
- tablesample_sql
- pivot_sql
- version_sql
- tuple_sql
- update_sql
- var_sql
- into_sql
- from_sql
- group_sql
- having_sql
- connect_sql
- prior_sql
- join_sql
- lambda_sql
- lateral_op
- lateral_sql
- limit_sql
- offset_sql
- setitem_sql
- set_sql
- pragma_sql
- lock_sql
- literal_sql
- escape_str
- loaddata_sql
- null_sql
- boolean_sql
- order_sql
- withfill_sql
- distribute_sql
- sort_sql
- ordered_sql
- matchrecognizemeasure_sql
- matchrecognize_sql
- query_modifiers
- options_modifier
- queryoption_sql
- offset_limit_modifiers
- after_limit_modifiers
- select_sql
- schema_sql
- schema_columns_sql
- star_sql
- parameter_sql
- sessionparameter_sql
- placeholder_sql
- subquery_sql
- qualify_sql
- set_operations
- union_sql
- union_op
- prewhere_sql
- where_sql
- window_sql
- partition_by_sql
- windowspec_sql
- withingroup_sql
- between_sql
- bracket_offset_expressions
- bracket_sql
- all_sql
- any_sql
- exists_sql
- case_sql
- constraint_sql
- nextvaluefor_sql
- extract_sql
- trim_sql
- convert_concat_args
- concat_sql
- concatws_sql
- check_sql
- foreignkey_sql
- primarykey_sql
- if_sql
- matchagainst_sql
- jsonkeyvalue_sql
- jsonpath_sql
- json_path_part
- formatjson_sql
- jsonobject_sql
- jsonobjectagg_sql
- jsonarray_sql
- jsonarrayagg_sql
- jsoncolumndef_sql
- jsonschema_sql
- jsontable_sql
- openjsoncolumndef_sql
- openjson_sql
- in_sql
- in_unnest_op
- interval_sql
- return_sql
- reference_sql
- anonymous_sql
- paren_sql
- neg_sql
- not_sql
- alias_sql
- pivotalias_sql
- aliases_sql
- atindex_sql
- attimezone_sql
- fromtimezone_sql
- add_sql
- and_sql
- or_sql
- xor_sql
- connector_sql
- bitwiseand_sql
- bitwiseleftshift_sql
- bitwisenot_sql
- bitwiseor_sql
- bitwiserightshift_sql
- bitwisexor_sql
- cast_sql
- currentdate_sql
- currenttimestamp_sql
- collate_sql
- command_sql
- comment_sql
- mergetreettlaction_sql
- mergetreettl_sql
- transaction_sql
- commit_sql
- rollback_sql
- altercolumn_sql
- alterdiststyle_sql
- altersortkey_sql
- renametable_sql
- renamecolumn_sql
- altertable_sql
- add_column_sql
- droppartition_sql
- addconstraint_sql
- distinct_sql
- ignorenulls_sql
- respectnulls_sql
- havingmax_sql
- intdiv_sql
- dpipe_sql
- div_sql
- overlaps_sql
- distance_sql
- dot_sql
- eq_sql
- propertyeq_sql
- escape_sql
- glob_sql
- gt_sql
- gte_sql
- ilike_sql
- ilikeany_sql
- is_sql
- like_sql
- likeany_sql
- similarto_sql
- lt_sql
- lte_sql
- mod_sql
- mul_sql
- neq_sql
- nullsafeeq_sql
- nullsafeneq_sql
- slice_sql
- sub_sql
- try_sql
- use_sql
- binary
- function_fallback_sql
- func
- format_args
- too_wide
- format_time
- expressions
- op_expressions
- naked_property
- tag_sql
- token_sql
- userdefinedfunction_sql
- joinhint_sql
- kwarg_sql
- when_sql
- merge_sql
- tochar_sql
- dictproperty_sql
- dictrange_sql
- dictsubproperty_sql
- oncluster_sql
- clusteredbyproperty_sql
- anyvalue_sql
- querytransform_sql
- indexconstraintoption_sql
- checkcolumnconstraint_sql
- indexcolumnconstraint_sql
- nvl2_sql
- comprehension_sql
- columnprefix_sql
- opclass_sql
- predict_sql
- forin_sql
- refresh_sql
- operator_sql
- toarray_sql
- tsordstotime_sql
- tsordstotimestamp_sql
- tsordstodate_sql
- unixdate_sql
- lastday_sql
- dateadd_sql
- arrayany_sql
- generateseries_sql
- partitionrange_sql
- truncatetable_sql
- convert_sql
- copyparameter_sql
- credentials_sql
- copy_sql
- semicolon_sql
- datadeletionproperty_sql
- maskingpolicycolumnconstraint_sql
- gapfill_sql
- scope_resolution
- scoperesolution_sql