sqlglot.dialects.presto
1from __future__ import annotations 2 3import typing as t 4 5from sqlglot import exp, generator, parser, tokens, transforms 6from sqlglot.dialects.dialect import ( 7 Dialect, 8 NormalizationStrategy, 9 binary_from_function, 10 bool_xor_sql, 11 date_trunc_to_time, 12 datestrtodate_sql, 13 encode_decode_sql, 14 build_formatted_time, 15 if_sql, 16 left_to_substring_sql, 17 no_ilike_sql, 18 no_pivot_sql, 19 no_safe_divide_sql, 20 no_timestamp_sql, 21 regexp_extract_sql, 22 rename_func, 23 right_to_substring_sql, 24 sha256_sql, 25 struct_extract_sql, 26 str_position_sql, 27 timestamptrunc_sql, 28 timestrtotime_sql, 29 ts_or_ds_add_cast, 30 unit_to_str, 31) 32from sqlglot.dialects.hive import Hive 33from sqlglot.dialects.mysql import MySQL 34from sqlglot.helper import apply_index_offset, seq_get 35from sqlglot.tokens import TokenType 36from sqlglot.transforms import unqualify_columns 37 38 39def _explode_to_unnest_sql(self: Presto.Generator, expression: exp.Lateral) -> str: 40 if isinstance(expression.this, exp.Explode): 41 return self.sql( 42 exp.Join( 43 this=exp.Unnest( 44 expressions=[expression.this.this], 45 alias=expression.args.get("alias"), 46 offset=isinstance(expression.this, exp.Posexplode), 47 ), 48 kind="cross", 49 ) 50 ) 51 return self.lateral_sql(expression) 52 53 54def _initcap_sql(self: Presto.Generator, expression: exp.Initcap) -> str: 55 regex = r"(\w)(\w*)" 56 return f"REGEXP_REPLACE({self.sql(expression, 'this')}, '{regex}', x -> UPPER(x[1]) || LOWER(x[2]))" 57 58 59def _no_sort_array(self: Presto.Generator, expression: exp.SortArray) -> str: 60 if expression.args.get("asc") == exp.false(): 61 comparator = "(a, b) -> CASE WHEN a < b THEN 1 WHEN a > b THEN -1 ELSE 0 END" 62 else: 63 comparator = None 64 return self.func("ARRAY_SORT", expression.this, comparator) 65 66 67def _schema_sql(self: Presto.Generator, expression: exp.Schema) -> str: 68 if isinstance(expression.parent, exp.Property): 69 columns = ", ".join(f"'{c.name}'" for c in expression.expressions) 70 return f"ARRAY[{columns}]" 71 72 if expression.parent: 73 for schema in expression.parent.find_all(exp.Schema): 74 column_defs = schema.find_all(exp.ColumnDef) 75 if column_defs and isinstance(schema.parent, exp.Property): 76 expression.expressions.extend(column_defs) 77 78 return self.schema_sql(expression) 79 80 81def _quantile_sql(self: Presto.Generator, expression: exp.Quantile) -> str: 82 self.unsupported("Presto does not support exact quantiles") 83 return self.func("APPROX_PERCENTILE", expression.this, expression.args.get("quantile")) 84 85 86def _str_to_time_sql( 87 self: Presto.Generator, expression: exp.StrToDate | exp.StrToTime | exp.TsOrDsToDate 88) -> str: 89 return self.func("DATE_PARSE", expression.this, self.format_time(expression)) 90 91 92def _ts_or_ds_to_date_sql(self: Presto.Generator, expression: exp.TsOrDsToDate) -> str: 93 time_format = self.format_time(expression) 94 if time_format and time_format not in (Presto.TIME_FORMAT, Presto.DATE_FORMAT): 95 return self.sql(exp.cast(_str_to_time_sql(self, expression), exp.DataType.Type.DATE)) 96 return self.sql( 97 exp.cast(exp.cast(expression.this, exp.DataType.Type.TIMESTAMP), exp.DataType.Type.DATE) 98 ) 99 100 101def _ts_or_ds_add_sql(self: Presto.Generator, expression: exp.TsOrDsAdd) -> str: 102 expression = ts_or_ds_add_cast(expression) 103 unit = unit_to_str(expression) 104 return self.func("DATE_ADD", unit, expression.expression, expression.this) 105 106 107def _ts_or_ds_diff_sql(self: Presto.Generator, expression: exp.TsOrDsDiff) -> str: 108 this = exp.cast(expression.this, exp.DataType.Type.TIMESTAMP) 109 expr = exp.cast(expression.expression, exp.DataType.Type.TIMESTAMP) 110 unit = unit_to_str(expression) 111 return self.func("DATE_DIFF", unit, expr, this) 112 113 114def _build_approx_percentile(args: t.List) -> exp.Expression: 115 if len(args) == 4: 116 return exp.ApproxQuantile( 117 this=seq_get(args, 0), 118 weight=seq_get(args, 1), 119 quantile=seq_get(args, 2), 120 accuracy=seq_get(args, 3), 121 ) 122 if len(args) == 3: 123 return exp.ApproxQuantile( 124 this=seq_get(args, 0), quantile=seq_get(args, 1), accuracy=seq_get(args, 2) 125 ) 126 return exp.ApproxQuantile.from_arg_list(args) 127 128 129def _build_from_unixtime(args: t.List) -> exp.Expression: 130 if len(args) == 3: 131 return exp.UnixToTime( 132 this=seq_get(args, 0), 133 hours=seq_get(args, 1), 134 minutes=seq_get(args, 2), 135 ) 136 if len(args) == 2: 137 return exp.UnixToTime(this=seq_get(args, 0), zone=seq_get(args, 1)) 138 139 return exp.UnixToTime.from_arg_list(args) 140 141 142def _unnest_sequence(expression: exp.Expression) -> exp.Expression: 143 if isinstance(expression, exp.Table): 144 if isinstance(expression.this, exp.GenerateSeries): 145 unnest = exp.Unnest(expressions=[expression.this]) 146 147 if expression.alias: 148 return exp.alias_(unnest, alias="_u", table=[expression.alias], copy=False) 149 return unnest 150 return expression 151 152 153def _first_last_sql(self: Presto.Generator, expression: exp.Func) -> str: 154 """ 155 Trino doesn't support FIRST / LAST as functions, but they're valid in the context 156 of MATCH_RECOGNIZE, so we need to preserve them in that case. In all other cases 157 they're converted into an ARBITRARY call. 158 159 Reference: https://trino.io/docs/current/sql/match-recognize.html#logical-navigation-functions 160 """ 161 if isinstance(expression.find_ancestor(exp.MatchRecognize, exp.Select), exp.MatchRecognize): 162 return self.function_fallback_sql(expression) 163 164 return rename_func("ARBITRARY")(self, expression) 165 166 167def _unix_to_time_sql(self: Presto.Generator, expression: exp.UnixToTime) -> str: 168 scale = expression.args.get("scale") 169 timestamp = self.sql(expression, "this") 170 if scale in (None, exp.UnixToTime.SECONDS): 171 return rename_func("FROM_UNIXTIME")(self, expression) 172 173 return f"FROM_UNIXTIME(CAST({timestamp} AS DOUBLE) / POW(10, {scale}))" 174 175 176def _to_int(expression: exp.Expression) -> exp.Expression: 177 if not expression.type: 178 from sqlglot.optimizer.annotate_types import annotate_types 179 180 annotate_types(expression) 181 if expression.type and expression.type.this not in exp.DataType.INTEGER_TYPES: 182 return exp.cast(expression, to=exp.DataType.Type.BIGINT) 183 return expression 184 185 186def _build_to_char(args: t.List) -> exp.TimeToStr: 187 fmt = seq_get(args, 1) 188 if isinstance(fmt, exp.Literal): 189 # We uppercase this to match Teradata's format mapping keys 190 fmt.set("this", fmt.this.upper()) 191 192 # We use "teradata" on purpose here, because the time formats are different in Presto. 193 # See https://prestodb.io/docs/current/functions/teradata.html?highlight=to_char#to_char 194 return build_formatted_time(exp.TimeToStr, "teradata")(args) 195 196 197class Presto(Dialect): 198 INDEX_OFFSET = 1 199 NULL_ORDERING = "nulls_are_last" 200 TIME_FORMAT = MySQL.TIME_FORMAT 201 TIME_MAPPING = MySQL.TIME_MAPPING 202 STRICT_STRING_CONCAT = True 203 SUPPORTS_SEMI_ANTI_JOIN = False 204 TYPED_DIVISION = True 205 TABLESAMPLE_SIZE_IS_PERCENT = True 206 LOG_BASE_FIRST: t.Optional[bool] = None 207 208 # https://github.com/trinodb/trino/issues/17 209 # https://github.com/trinodb/trino/issues/12289 210 # https://github.com/prestodb/presto/issues/2863 211 NORMALIZATION_STRATEGY = NormalizationStrategy.CASE_INSENSITIVE 212 213 class Tokenizer(tokens.Tokenizer): 214 UNICODE_STRINGS = [ 215 (prefix + q, q) 216 for q in t.cast(t.List[str], tokens.Tokenizer.QUOTES) 217 for prefix in ("U&", "u&") 218 ] 219 220 KEYWORDS = { 221 **tokens.Tokenizer.KEYWORDS, 222 "START": TokenType.BEGIN, 223 "MATCH_RECOGNIZE": TokenType.MATCH_RECOGNIZE, 224 "ROW": TokenType.STRUCT, 225 "IPADDRESS": TokenType.IPADDRESS, 226 "IPPREFIX": TokenType.IPPREFIX, 227 "TDIGEST": TokenType.TDIGEST, 228 "HYPERLOGLOG": TokenType.HLLSKETCH, 229 } 230 KEYWORDS.pop("/*+") 231 KEYWORDS.pop("QUALIFY") 232 233 class Parser(parser.Parser): 234 VALUES_FOLLOWED_BY_PAREN = False 235 236 FUNCTIONS = { 237 **parser.Parser.FUNCTIONS, 238 "ARBITRARY": exp.AnyValue.from_arg_list, 239 "APPROX_DISTINCT": exp.ApproxDistinct.from_arg_list, 240 "APPROX_PERCENTILE": _build_approx_percentile, 241 "BITWISE_AND": binary_from_function(exp.BitwiseAnd), 242 "BITWISE_NOT": lambda args: exp.BitwiseNot(this=seq_get(args, 0)), 243 "BITWISE_OR": binary_from_function(exp.BitwiseOr), 244 "BITWISE_XOR": binary_from_function(exp.BitwiseXor), 245 "CARDINALITY": exp.ArraySize.from_arg_list, 246 "CONTAINS": exp.ArrayContains.from_arg_list, 247 "DATE_ADD": lambda args: exp.DateAdd( 248 this=seq_get(args, 2), expression=seq_get(args, 1), unit=seq_get(args, 0) 249 ), 250 "DATE_DIFF": lambda args: exp.DateDiff( 251 this=seq_get(args, 2), expression=seq_get(args, 1), unit=seq_get(args, 0) 252 ), 253 "DATE_FORMAT": build_formatted_time(exp.TimeToStr, "presto"), 254 "DATE_PARSE": build_formatted_time(exp.StrToTime, "presto"), 255 "DATE_TRUNC": date_trunc_to_time, 256 "ELEMENT_AT": lambda args: exp.Bracket( 257 this=seq_get(args, 0), expressions=[seq_get(args, 1)], offset=1, safe=True 258 ), 259 "FROM_HEX": exp.Unhex.from_arg_list, 260 "FROM_UNIXTIME": _build_from_unixtime, 261 "FROM_UTF8": lambda args: exp.Decode( 262 this=seq_get(args, 0), replace=seq_get(args, 1), charset=exp.Literal.string("utf-8") 263 ), 264 "NOW": exp.CurrentTimestamp.from_arg_list, 265 "REGEXP_EXTRACT": lambda args: exp.RegexpExtract( 266 this=seq_get(args, 0), expression=seq_get(args, 1), group=seq_get(args, 2) 267 ), 268 "REGEXP_REPLACE": lambda args: exp.RegexpReplace( 269 this=seq_get(args, 0), 270 expression=seq_get(args, 1), 271 replacement=seq_get(args, 2) or exp.Literal.string(""), 272 ), 273 "ROW": exp.Struct.from_arg_list, 274 "SEQUENCE": exp.GenerateSeries.from_arg_list, 275 "SET_AGG": exp.ArrayUniqueAgg.from_arg_list, 276 "SPLIT_TO_MAP": exp.StrToMap.from_arg_list, 277 "STRPOS": lambda args: exp.StrPosition( 278 this=seq_get(args, 0), substr=seq_get(args, 1), instance=seq_get(args, 2) 279 ), 280 "TO_CHAR": _build_to_char, 281 "TO_UNIXTIME": exp.TimeToUnix.from_arg_list, 282 "TO_UTF8": lambda args: exp.Encode( 283 this=seq_get(args, 0), charset=exp.Literal.string("utf-8") 284 ), 285 "MD5": exp.MD5Digest.from_arg_list, 286 "SHA256": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(256)), 287 "SHA512": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(512)), 288 } 289 290 FUNCTION_PARSERS = parser.Parser.FUNCTION_PARSERS.copy() 291 FUNCTION_PARSERS.pop("TRIM") 292 293 class Generator(generator.Generator): 294 INTERVAL_ALLOWS_PLURAL_FORM = False 295 JOIN_HINTS = False 296 TABLE_HINTS = False 297 QUERY_HINTS = False 298 IS_BOOL_ALLOWED = False 299 TZ_TO_WITH_TIME_ZONE = True 300 NVL2_SUPPORTED = False 301 STRUCT_DELIMITER = ("(", ")") 302 LIMIT_ONLY_LITERALS = True 303 SUPPORTS_SINGLE_ARG_CONCAT = False 304 LIKE_PROPERTY_INSIDE_SCHEMA = True 305 MULTI_ARG_DISTINCT = False 306 SUPPORTS_TO_NUMBER = False 307 HEX_FUNC = "TO_HEX" 308 309 PROPERTIES_LOCATION = { 310 **generator.Generator.PROPERTIES_LOCATION, 311 exp.LocationProperty: exp.Properties.Location.UNSUPPORTED, 312 exp.VolatileProperty: exp.Properties.Location.UNSUPPORTED, 313 } 314 315 TYPE_MAPPING = { 316 **generator.Generator.TYPE_MAPPING, 317 exp.DataType.Type.INT: "INTEGER", 318 exp.DataType.Type.FLOAT: "REAL", 319 exp.DataType.Type.BINARY: "VARBINARY", 320 exp.DataType.Type.TEXT: "VARCHAR", 321 exp.DataType.Type.TIMETZ: "TIME", 322 exp.DataType.Type.TIMESTAMPTZ: "TIMESTAMP", 323 exp.DataType.Type.STRUCT: "ROW", 324 exp.DataType.Type.DATETIME: "TIMESTAMP", 325 exp.DataType.Type.DATETIME64: "TIMESTAMP", 326 exp.DataType.Type.HLLSKETCH: "HYPERLOGLOG", 327 } 328 329 TRANSFORMS = { 330 **generator.Generator.TRANSFORMS, 331 exp.AnyValue: rename_func("ARBITRARY"), 332 exp.ApproxDistinct: lambda self, e: self.func( 333 "APPROX_DISTINCT", e.this, e.args.get("accuracy") 334 ), 335 exp.ApproxQuantile: rename_func("APPROX_PERCENTILE"), 336 exp.ArgMax: rename_func("MAX_BY"), 337 exp.ArgMin: rename_func("MIN_BY"), 338 exp.Array: lambda self, e: f"ARRAY[{self.expressions(e, flat=True)}]", 339 exp.ArrayAny: rename_func("ANY_MATCH"), 340 exp.ArrayConcat: rename_func("CONCAT"), 341 exp.ArrayContains: rename_func("CONTAINS"), 342 exp.ArraySize: rename_func("CARDINALITY"), 343 exp.ArrayToString: rename_func("ARRAY_JOIN"), 344 exp.ArrayUniqueAgg: rename_func("SET_AGG"), 345 exp.AtTimeZone: rename_func("AT_TIMEZONE"), 346 exp.BitwiseAnd: lambda self, e: self.func("BITWISE_AND", e.this, e.expression), 347 exp.BitwiseLeftShift: lambda self, e: self.func( 348 "BITWISE_ARITHMETIC_SHIFT_LEFT", e.this, e.expression 349 ), 350 exp.BitwiseNot: lambda self, e: self.func("BITWISE_NOT", e.this), 351 exp.BitwiseOr: lambda self, e: self.func("BITWISE_OR", e.this, e.expression), 352 exp.BitwiseRightShift: lambda self, e: self.func( 353 "BITWISE_ARITHMETIC_SHIFT_RIGHT", e.this, e.expression 354 ), 355 exp.BitwiseXor: lambda self, e: self.func("BITWISE_XOR", e.this, e.expression), 356 exp.Cast: transforms.preprocess([transforms.epoch_cast_to_ts]), 357 exp.CurrentTimestamp: lambda *_: "CURRENT_TIMESTAMP", 358 exp.DateAdd: lambda self, e: self.func( 359 "DATE_ADD", 360 unit_to_str(e), 361 _to_int(e.expression), 362 e.this, 363 ), 364 exp.DateDiff: lambda self, e: self.func( 365 "DATE_DIFF", unit_to_str(e), e.expression, e.this 366 ), 367 exp.DateStrToDate: datestrtodate_sql, 368 exp.DateToDi: lambda self, 369 e: f"CAST(DATE_FORMAT({self.sql(e, 'this')}, {Presto.DATEINT_FORMAT}) AS INT)", 370 exp.DateSub: lambda self, e: self.func( 371 "DATE_ADD", 372 unit_to_str(e), 373 _to_int(e.expression * -1), 374 e.this, 375 ), 376 exp.Decode: lambda self, e: encode_decode_sql(self, e, "FROM_UTF8"), 377 exp.DiToDate: lambda self, 378 e: f"CAST(DATE_PARSE(CAST({self.sql(e, 'this')} AS VARCHAR), {Presto.DATEINT_FORMAT}) AS DATE)", 379 exp.Encode: lambda self, e: encode_decode_sql(self, e, "TO_UTF8"), 380 exp.FileFormatProperty: lambda self, e: f"FORMAT='{e.name.upper()}'", 381 exp.First: _first_last_sql, 382 exp.FirstValue: _first_last_sql, 383 exp.FromTimeZone: lambda self, 384 e: f"WITH_TIMEZONE({self.sql(e, 'this')}, {self.sql(e, 'zone')}) AT TIME ZONE 'UTC'", 385 exp.Group: transforms.preprocess([transforms.unalias_group]), 386 exp.GroupConcat: lambda self, e: self.func( 387 "ARRAY_JOIN", self.func("ARRAY_AGG", e.this), e.args.get("separator") 388 ), 389 exp.If: if_sql(), 390 exp.ILike: no_ilike_sql, 391 exp.Initcap: _initcap_sql, 392 exp.ParseJSON: rename_func("JSON_PARSE"), 393 exp.Last: _first_last_sql, 394 exp.LastValue: _first_last_sql, 395 exp.LastDay: lambda self, e: self.func("LAST_DAY_OF_MONTH", e.this), 396 exp.Lateral: _explode_to_unnest_sql, 397 exp.Left: left_to_substring_sql, 398 exp.Levenshtein: rename_func("LEVENSHTEIN_DISTANCE"), 399 exp.LogicalAnd: rename_func("BOOL_AND"), 400 exp.LogicalOr: rename_func("BOOL_OR"), 401 exp.Pivot: no_pivot_sql, 402 exp.Quantile: _quantile_sql, 403 exp.RegexpExtract: regexp_extract_sql, 404 exp.Right: right_to_substring_sql, 405 exp.SafeDivide: no_safe_divide_sql, 406 exp.Schema: _schema_sql, 407 exp.SchemaCommentProperty: lambda self, e: self.naked_property(e), 408 exp.Select: transforms.preprocess( 409 [ 410 transforms.eliminate_qualify, 411 transforms.eliminate_distinct_on, 412 transforms.explode_to_unnest(1), 413 transforms.eliminate_semi_and_anti_joins, 414 ] 415 ), 416 exp.SortArray: _no_sort_array, 417 exp.StrPosition: lambda self, e: str_position_sql(self, e, generate_instance=True), 418 exp.StrToDate: lambda self, e: f"CAST({_str_to_time_sql(self, e)} AS DATE)", 419 exp.StrToMap: rename_func("SPLIT_TO_MAP"), 420 exp.StrToTime: _str_to_time_sql, 421 exp.StructExtract: struct_extract_sql, 422 exp.Table: transforms.preprocess([_unnest_sequence]), 423 exp.Timestamp: no_timestamp_sql, 424 exp.TimestampTrunc: timestamptrunc_sql(), 425 exp.TimeStrToDate: timestrtotime_sql, 426 exp.TimeStrToTime: timestrtotime_sql, 427 exp.TimeStrToUnix: lambda self, e: self.func( 428 "TO_UNIXTIME", self.func("DATE_PARSE", e.this, Presto.TIME_FORMAT) 429 ), 430 exp.TimeToStr: lambda self, e: self.func("DATE_FORMAT", e.this, self.format_time(e)), 431 exp.TimeToUnix: rename_func("TO_UNIXTIME"), 432 exp.ToChar: lambda self, e: self.func("DATE_FORMAT", e.this, self.format_time(e)), 433 exp.TryCast: transforms.preprocess([transforms.epoch_cast_to_ts]), 434 exp.TsOrDiToDi: lambda self, 435 e: f"CAST(SUBSTR(REPLACE(CAST({self.sql(e, 'this')} AS VARCHAR), '-', ''), 1, 8) AS INT)", 436 exp.TsOrDsAdd: _ts_or_ds_add_sql, 437 exp.TsOrDsDiff: _ts_or_ds_diff_sql, 438 exp.TsOrDsToDate: _ts_or_ds_to_date_sql, 439 exp.Unhex: rename_func("FROM_HEX"), 440 exp.UnixToStr: lambda self, 441 e: f"DATE_FORMAT(FROM_UNIXTIME({self.sql(e, 'this')}), {self.format_time(e)})", 442 exp.UnixToTime: _unix_to_time_sql, 443 exp.UnixToTimeStr: lambda self, 444 e: f"CAST(FROM_UNIXTIME({self.sql(e, 'this')}) AS VARCHAR)", 445 exp.VariancePop: rename_func("VAR_POP"), 446 exp.With: transforms.preprocess([transforms.add_recursive_cte_column_names]), 447 exp.WithinGroup: transforms.preprocess( 448 [transforms.remove_within_group_for_percentiles] 449 ), 450 exp.Xor: bool_xor_sql, 451 exp.MD5: lambda self, e: self.func( 452 "LOWER", self.func("TO_HEX", self.func("MD5", self.sql(e, "this"))) 453 ), 454 exp.MD5Digest: rename_func("MD5"), 455 exp.SHA: rename_func("SHA1"), 456 exp.SHA2: sha256_sql, 457 } 458 459 RESERVED_KEYWORDS = { 460 "alter", 461 "and", 462 "as", 463 "between", 464 "by", 465 "case", 466 "cast", 467 "constraint", 468 "create", 469 "cross", 470 "current_time", 471 "current_timestamp", 472 "deallocate", 473 "delete", 474 "describe", 475 "distinct", 476 "drop", 477 "else", 478 "end", 479 "escape", 480 "except", 481 "execute", 482 "exists", 483 "extract", 484 "false", 485 "for", 486 "from", 487 "full", 488 "group", 489 "having", 490 "in", 491 "inner", 492 "insert", 493 "intersect", 494 "into", 495 "is", 496 "join", 497 "left", 498 "like", 499 "natural", 500 "not", 501 "null", 502 "on", 503 "or", 504 "order", 505 "outer", 506 "prepare", 507 "right", 508 "select", 509 "table", 510 "then", 511 "true", 512 "union", 513 "using", 514 "values", 515 "when", 516 "where", 517 "with", 518 } 519 520 def strtounix_sql(self, expression: exp.StrToUnix) -> str: 521 # Since `TO_UNIXTIME` requires a `TIMESTAMP`, we need to parse the argument into one. 522 # To do this, we first try to `DATE_PARSE` it, but since this can fail when there's a 523 # timezone involved, we wrap it in a `TRY` call and use `PARSE_DATETIME` as a fallback, 524 # which seems to be using the same time mapping as Hive, as per: 525 # https://joda-time.sourceforge.net/apidocs/org/joda/time/format/DateTimeFormat.html 526 value_as_text = exp.cast(expression.this, exp.DataType.Type.TEXT) 527 parse_without_tz = self.func("DATE_PARSE", value_as_text, self.format_time(expression)) 528 parse_with_tz = self.func( 529 "PARSE_DATETIME", 530 value_as_text, 531 self.format_time(expression, Hive.INVERSE_TIME_MAPPING, Hive.INVERSE_TIME_TRIE), 532 ) 533 coalesced = self.func("COALESCE", self.func("TRY", parse_without_tz), parse_with_tz) 534 return self.func("TO_UNIXTIME", coalesced) 535 536 def bracket_sql(self, expression: exp.Bracket) -> str: 537 if expression.args.get("safe"): 538 return self.func( 539 "ELEMENT_AT", 540 expression.this, 541 seq_get( 542 apply_index_offset( 543 expression.this, 544 expression.expressions, 545 1 - expression.args.get("offset", 0), 546 ), 547 0, 548 ), 549 ) 550 return super().bracket_sql(expression) 551 552 def struct_sql(self, expression: exp.Struct) -> str: 553 from sqlglot.optimizer.annotate_types import annotate_types 554 555 expression = annotate_types(expression) 556 values: t.List[str] = [] 557 schema: t.List[str] = [] 558 unknown_type = False 559 560 for e in expression.expressions: 561 if isinstance(e, exp.PropertyEQ): 562 if e.type and e.type.is_type(exp.DataType.Type.UNKNOWN): 563 unknown_type = True 564 else: 565 schema.append(f"{self.sql(e, 'this')} {self.sql(e.type)}") 566 values.append(self.sql(e, "expression")) 567 else: 568 values.append(self.sql(e)) 569 570 size = len(expression.expressions) 571 572 if not size or len(schema) != size: 573 if unknown_type: 574 self.unsupported( 575 "Cannot convert untyped key-value definitions (try annotate_types)." 576 ) 577 return self.func("ROW", *values) 578 return f"CAST(ROW({', '.join(values)}) AS ROW({', '.join(schema)}))" 579 580 def interval_sql(self, expression: exp.Interval) -> str: 581 if expression.this and expression.text("unit").upper().startswith("WEEK"): 582 return f"({expression.this.name} * INTERVAL '7' DAY)" 583 return super().interval_sql(expression) 584 585 def transaction_sql(self, expression: exp.Transaction) -> str: 586 modes = expression.args.get("modes") 587 modes = f" {', '.join(modes)}" if modes else "" 588 return f"START TRANSACTION{modes}" 589 590 def generateseries_sql(self, expression: exp.GenerateSeries) -> str: 591 start = expression.args["start"] 592 end = expression.args["end"] 593 step = expression.args.get("step") 594 595 if isinstance(start, exp.Cast): 596 target_type = start.to 597 elif isinstance(end, exp.Cast): 598 target_type = end.to 599 else: 600 target_type = None 601 602 if target_type and target_type.is_type("timestamp"): 603 if target_type is start.to: 604 end = exp.cast(end, target_type) 605 else: 606 start = exp.cast(start, target_type) 607 608 return self.func("SEQUENCE", start, end, step) 609 610 def offset_limit_modifiers( 611 self, expression: exp.Expression, fetch: bool, limit: t.Optional[exp.Fetch | exp.Limit] 612 ) -> t.List[str]: 613 return [ 614 self.sql(expression, "offset"), 615 self.sql(limit), 616 ] 617 618 def create_sql(self, expression: exp.Create) -> str: 619 """ 620 Presto doesn't support CREATE VIEW with expressions (ex: `CREATE VIEW x (cola)` then `(cola)` is the expression), 621 so we need to remove them 622 """ 623 kind = expression.args["kind"] 624 schema = expression.this 625 if kind == "VIEW" and schema.expressions: 626 expression.this.set("expressions", None) 627 return super().create_sql(expression) 628 629 def delete_sql(self, expression: exp.Delete) -> str: 630 """ 631 Presto only supports DELETE FROM for a single table without an alias, so we need 632 to remove the unnecessary parts. If the original DELETE statement contains more 633 than one table to be deleted, we can't safely map it 1-1 to a Presto statement. 634 """ 635 tables = expression.args.get("tables") or [expression.this] 636 if len(tables) > 1: 637 return super().delete_sql(expression) 638 639 table = tables[0] 640 expression.set("this", table) 641 expression.set("tables", None) 642 643 if isinstance(table, exp.Table): 644 table_alias = table.args.get("alias") 645 if table_alias: 646 table_alias.pop() 647 expression = t.cast(exp.Delete, expression.transform(unqualify_columns)) 648 649 return super().delete_sql(expression)
198class Presto(Dialect): 199 INDEX_OFFSET = 1 200 NULL_ORDERING = "nulls_are_last" 201 TIME_FORMAT = MySQL.TIME_FORMAT 202 TIME_MAPPING = MySQL.TIME_MAPPING 203 STRICT_STRING_CONCAT = True 204 SUPPORTS_SEMI_ANTI_JOIN = False 205 TYPED_DIVISION = True 206 TABLESAMPLE_SIZE_IS_PERCENT = True 207 LOG_BASE_FIRST: t.Optional[bool] = None 208 209 # https://github.com/trinodb/trino/issues/17 210 # https://github.com/trinodb/trino/issues/12289 211 # https://github.com/prestodb/presto/issues/2863 212 NORMALIZATION_STRATEGY = NormalizationStrategy.CASE_INSENSITIVE 213 214 class Tokenizer(tokens.Tokenizer): 215 UNICODE_STRINGS = [ 216 (prefix + q, q) 217 for q in t.cast(t.List[str], tokens.Tokenizer.QUOTES) 218 for prefix in ("U&", "u&") 219 ] 220 221 KEYWORDS = { 222 **tokens.Tokenizer.KEYWORDS, 223 "START": TokenType.BEGIN, 224 "MATCH_RECOGNIZE": TokenType.MATCH_RECOGNIZE, 225 "ROW": TokenType.STRUCT, 226 "IPADDRESS": TokenType.IPADDRESS, 227 "IPPREFIX": TokenType.IPPREFIX, 228 "TDIGEST": TokenType.TDIGEST, 229 "HYPERLOGLOG": TokenType.HLLSKETCH, 230 } 231 KEYWORDS.pop("/*+") 232 KEYWORDS.pop("QUALIFY") 233 234 class Parser(parser.Parser): 235 VALUES_FOLLOWED_BY_PAREN = False 236 237 FUNCTIONS = { 238 **parser.Parser.FUNCTIONS, 239 "ARBITRARY": exp.AnyValue.from_arg_list, 240 "APPROX_DISTINCT": exp.ApproxDistinct.from_arg_list, 241 "APPROX_PERCENTILE": _build_approx_percentile, 242 "BITWISE_AND": binary_from_function(exp.BitwiseAnd), 243 "BITWISE_NOT": lambda args: exp.BitwiseNot(this=seq_get(args, 0)), 244 "BITWISE_OR": binary_from_function(exp.BitwiseOr), 245 "BITWISE_XOR": binary_from_function(exp.BitwiseXor), 246 "CARDINALITY": exp.ArraySize.from_arg_list, 247 "CONTAINS": exp.ArrayContains.from_arg_list, 248 "DATE_ADD": lambda args: exp.DateAdd( 249 this=seq_get(args, 2), expression=seq_get(args, 1), unit=seq_get(args, 0) 250 ), 251 "DATE_DIFF": lambda args: exp.DateDiff( 252 this=seq_get(args, 2), expression=seq_get(args, 1), unit=seq_get(args, 0) 253 ), 254 "DATE_FORMAT": build_formatted_time(exp.TimeToStr, "presto"), 255 "DATE_PARSE": build_formatted_time(exp.StrToTime, "presto"), 256 "DATE_TRUNC": date_trunc_to_time, 257 "ELEMENT_AT": lambda args: exp.Bracket( 258 this=seq_get(args, 0), expressions=[seq_get(args, 1)], offset=1, safe=True 259 ), 260 "FROM_HEX": exp.Unhex.from_arg_list, 261 "FROM_UNIXTIME": _build_from_unixtime, 262 "FROM_UTF8": lambda args: exp.Decode( 263 this=seq_get(args, 0), replace=seq_get(args, 1), charset=exp.Literal.string("utf-8") 264 ), 265 "NOW": exp.CurrentTimestamp.from_arg_list, 266 "REGEXP_EXTRACT": lambda args: exp.RegexpExtract( 267 this=seq_get(args, 0), expression=seq_get(args, 1), group=seq_get(args, 2) 268 ), 269 "REGEXP_REPLACE": lambda args: exp.RegexpReplace( 270 this=seq_get(args, 0), 271 expression=seq_get(args, 1), 272 replacement=seq_get(args, 2) or exp.Literal.string(""), 273 ), 274 "ROW": exp.Struct.from_arg_list, 275 "SEQUENCE": exp.GenerateSeries.from_arg_list, 276 "SET_AGG": exp.ArrayUniqueAgg.from_arg_list, 277 "SPLIT_TO_MAP": exp.StrToMap.from_arg_list, 278 "STRPOS": lambda args: exp.StrPosition( 279 this=seq_get(args, 0), substr=seq_get(args, 1), instance=seq_get(args, 2) 280 ), 281 "TO_CHAR": _build_to_char, 282 "TO_UNIXTIME": exp.TimeToUnix.from_arg_list, 283 "TO_UTF8": lambda args: exp.Encode( 284 this=seq_get(args, 0), charset=exp.Literal.string("utf-8") 285 ), 286 "MD5": exp.MD5Digest.from_arg_list, 287 "SHA256": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(256)), 288 "SHA512": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(512)), 289 } 290 291 FUNCTION_PARSERS = parser.Parser.FUNCTION_PARSERS.copy() 292 FUNCTION_PARSERS.pop("TRIM") 293 294 class Generator(generator.Generator): 295 INTERVAL_ALLOWS_PLURAL_FORM = False 296 JOIN_HINTS = False 297 TABLE_HINTS = False 298 QUERY_HINTS = False 299 IS_BOOL_ALLOWED = False 300 TZ_TO_WITH_TIME_ZONE = True 301 NVL2_SUPPORTED = False 302 STRUCT_DELIMITER = ("(", ")") 303 LIMIT_ONLY_LITERALS = True 304 SUPPORTS_SINGLE_ARG_CONCAT = False 305 LIKE_PROPERTY_INSIDE_SCHEMA = True 306 MULTI_ARG_DISTINCT = False 307 SUPPORTS_TO_NUMBER = False 308 HEX_FUNC = "TO_HEX" 309 310 PROPERTIES_LOCATION = { 311 **generator.Generator.PROPERTIES_LOCATION, 312 exp.LocationProperty: exp.Properties.Location.UNSUPPORTED, 313 exp.VolatileProperty: exp.Properties.Location.UNSUPPORTED, 314 } 315 316 TYPE_MAPPING = { 317 **generator.Generator.TYPE_MAPPING, 318 exp.DataType.Type.INT: "INTEGER", 319 exp.DataType.Type.FLOAT: "REAL", 320 exp.DataType.Type.BINARY: "VARBINARY", 321 exp.DataType.Type.TEXT: "VARCHAR", 322 exp.DataType.Type.TIMETZ: "TIME", 323 exp.DataType.Type.TIMESTAMPTZ: "TIMESTAMP", 324 exp.DataType.Type.STRUCT: "ROW", 325 exp.DataType.Type.DATETIME: "TIMESTAMP", 326 exp.DataType.Type.DATETIME64: "TIMESTAMP", 327 exp.DataType.Type.HLLSKETCH: "HYPERLOGLOG", 328 } 329 330 TRANSFORMS = { 331 **generator.Generator.TRANSFORMS, 332 exp.AnyValue: rename_func("ARBITRARY"), 333 exp.ApproxDistinct: lambda self, e: self.func( 334 "APPROX_DISTINCT", e.this, e.args.get("accuracy") 335 ), 336 exp.ApproxQuantile: rename_func("APPROX_PERCENTILE"), 337 exp.ArgMax: rename_func("MAX_BY"), 338 exp.ArgMin: rename_func("MIN_BY"), 339 exp.Array: lambda self, e: f"ARRAY[{self.expressions(e, flat=True)}]", 340 exp.ArrayAny: rename_func("ANY_MATCH"), 341 exp.ArrayConcat: rename_func("CONCAT"), 342 exp.ArrayContains: rename_func("CONTAINS"), 343 exp.ArraySize: rename_func("CARDINALITY"), 344 exp.ArrayToString: rename_func("ARRAY_JOIN"), 345 exp.ArrayUniqueAgg: rename_func("SET_AGG"), 346 exp.AtTimeZone: rename_func("AT_TIMEZONE"), 347 exp.BitwiseAnd: lambda self, e: self.func("BITWISE_AND", e.this, e.expression), 348 exp.BitwiseLeftShift: lambda self, e: self.func( 349 "BITWISE_ARITHMETIC_SHIFT_LEFT", e.this, e.expression 350 ), 351 exp.BitwiseNot: lambda self, e: self.func("BITWISE_NOT", e.this), 352 exp.BitwiseOr: lambda self, e: self.func("BITWISE_OR", e.this, e.expression), 353 exp.BitwiseRightShift: lambda self, e: self.func( 354 "BITWISE_ARITHMETIC_SHIFT_RIGHT", e.this, e.expression 355 ), 356 exp.BitwiseXor: lambda self, e: self.func("BITWISE_XOR", e.this, e.expression), 357 exp.Cast: transforms.preprocess([transforms.epoch_cast_to_ts]), 358 exp.CurrentTimestamp: lambda *_: "CURRENT_TIMESTAMP", 359 exp.DateAdd: lambda self, e: self.func( 360 "DATE_ADD", 361 unit_to_str(e), 362 _to_int(e.expression), 363 e.this, 364 ), 365 exp.DateDiff: lambda self, e: self.func( 366 "DATE_DIFF", unit_to_str(e), e.expression, e.this 367 ), 368 exp.DateStrToDate: datestrtodate_sql, 369 exp.DateToDi: lambda self, 370 e: f"CAST(DATE_FORMAT({self.sql(e, 'this')}, {Presto.DATEINT_FORMAT}) AS INT)", 371 exp.DateSub: lambda self, e: self.func( 372 "DATE_ADD", 373 unit_to_str(e), 374 _to_int(e.expression * -1), 375 e.this, 376 ), 377 exp.Decode: lambda self, e: encode_decode_sql(self, e, "FROM_UTF8"), 378 exp.DiToDate: lambda self, 379 e: f"CAST(DATE_PARSE(CAST({self.sql(e, 'this')} AS VARCHAR), {Presto.DATEINT_FORMAT}) AS DATE)", 380 exp.Encode: lambda self, e: encode_decode_sql(self, e, "TO_UTF8"), 381 exp.FileFormatProperty: lambda self, e: f"FORMAT='{e.name.upper()}'", 382 exp.First: _first_last_sql, 383 exp.FirstValue: _first_last_sql, 384 exp.FromTimeZone: lambda self, 385 e: f"WITH_TIMEZONE({self.sql(e, 'this')}, {self.sql(e, 'zone')}) AT TIME ZONE 'UTC'", 386 exp.Group: transforms.preprocess([transforms.unalias_group]), 387 exp.GroupConcat: lambda self, e: self.func( 388 "ARRAY_JOIN", self.func("ARRAY_AGG", e.this), e.args.get("separator") 389 ), 390 exp.If: if_sql(), 391 exp.ILike: no_ilike_sql, 392 exp.Initcap: _initcap_sql, 393 exp.ParseJSON: rename_func("JSON_PARSE"), 394 exp.Last: _first_last_sql, 395 exp.LastValue: _first_last_sql, 396 exp.LastDay: lambda self, e: self.func("LAST_DAY_OF_MONTH", e.this), 397 exp.Lateral: _explode_to_unnest_sql, 398 exp.Left: left_to_substring_sql, 399 exp.Levenshtein: rename_func("LEVENSHTEIN_DISTANCE"), 400 exp.LogicalAnd: rename_func("BOOL_AND"), 401 exp.LogicalOr: rename_func("BOOL_OR"), 402 exp.Pivot: no_pivot_sql, 403 exp.Quantile: _quantile_sql, 404 exp.RegexpExtract: regexp_extract_sql, 405 exp.Right: right_to_substring_sql, 406 exp.SafeDivide: no_safe_divide_sql, 407 exp.Schema: _schema_sql, 408 exp.SchemaCommentProperty: lambda self, e: self.naked_property(e), 409 exp.Select: transforms.preprocess( 410 [ 411 transforms.eliminate_qualify, 412 transforms.eliminate_distinct_on, 413 transforms.explode_to_unnest(1), 414 transforms.eliminate_semi_and_anti_joins, 415 ] 416 ), 417 exp.SortArray: _no_sort_array, 418 exp.StrPosition: lambda self, e: str_position_sql(self, e, generate_instance=True), 419 exp.StrToDate: lambda self, e: f"CAST({_str_to_time_sql(self, e)} AS DATE)", 420 exp.StrToMap: rename_func("SPLIT_TO_MAP"), 421 exp.StrToTime: _str_to_time_sql, 422 exp.StructExtract: struct_extract_sql, 423 exp.Table: transforms.preprocess([_unnest_sequence]), 424 exp.Timestamp: no_timestamp_sql, 425 exp.TimestampTrunc: timestamptrunc_sql(), 426 exp.TimeStrToDate: timestrtotime_sql, 427 exp.TimeStrToTime: timestrtotime_sql, 428 exp.TimeStrToUnix: lambda self, e: self.func( 429 "TO_UNIXTIME", self.func("DATE_PARSE", e.this, Presto.TIME_FORMAT) 430 ), 431 exp.TimeToStr: lambda self, e: self.func("DATE_FORMAT", e.this, self.format_time(e)), 432 exp.TimeToUnix: rename_func("TO_UNIXTIME"), 433 exp.ToChar: lambda self, e: self.func("DATE_FORMAT", e.this, self.format_time(e)), 434 exp.TryCast: transforms.preprocess([transforms.epoch_cast_to_ts]), 435 exp.TsOrDiToDi: lambda self, 436 e: f"CAST(SUBSTR(REPLACE(CAST({self.sql(e, 'this')} AS VARCHAR), '-', ''), 1, 8) AS INT)", 437 exp.TsOrDsAdd: _ts_or_ds_add_sql, 438 exp.TsOrDsDiff: _ts_or_ds_diff_sql, 439 exp.TsOrDsToDate: _ts_or_ds_to_date_sql, 440 exp.Unhex: rename_func("FROM_HEX"), 441 exp.UnixToStr: lambda self, 442 e: f"DATE_FORMAT(FROM_UNIXTIME({self.sql(e, 'this')}), {self.format_time(e)})", 443 exp.UnixToTime: _unix_to_time_sql, 444 exp.UnixToTimeStr: lambda self, 445 e: f"CAST(FROM_UNIXTIME({self.sql(e, 'this')}) AS VARCHAR)", 446 exp.VariancePop: rename_func("VAR_POP"), 447 exp.With: transforms.preprocess([transforms.add_recursive_cte_column_names]), 448 exp.WithinGroup: transforms.preprocess( 449 [transforms.remove_within_group_for_percentiles] 450 ), 451 exp.Xor: bool_xor_sql, 452 exp.MD5: lambda self, e: self.func( 453 "LOWER", self.func("TO_HEX", self.func("MD5", self.sql(e, "this"))) 454 ), 455 exp.MD5Digest: rename_func("MD5"), 456 exp.SHA: rename_func("SHA1"), 457 exp.SHA2: sha256_sql, 458 } 459 460 RESERVED_KEYWORDS = { 461 "alter", 462 "and", 463 "as", 464 "between", 465 "by", 466 "case", 467 "cast", 468 "constraint", 469 "create", 470 "cross", 471 "current_time", 472 "current_timestamp", 473 "deallocate", 474 "delete", 475 "describe", 476 "distinct", 477 "drop", 478 "else", 479 "end", 480 "escape", 481 "except", 482 "execute", 483 "exists", 484 "extract", 485 "false", 486 "for", 487 "from", 488 "full", 489 "group", 490 "having", 491 "in", 492 "inner", 493 "insert", 494 "intersect", 495 "into", 496 "is", 497 "join", 498 "left", 499 "like", 500 "natural", 501 "not", 502 "null", 503 "on", 504 "or", 505 "order", 506 "outer", 507 "prepare", 508 "right", 509 "select", 510 "table", 511 "then", 512 "true", 513 "union", 514 "using", 515 "values", 516 "when", 517 "where", 518 "with", 519 } 520 521 def strtounix_sql(self, expression: exp.StrToUnix) -> str: 522 # Since `TO_UNIXTIME` requires a `TIMESTAMP`, we need to parse the argument into one. 523 # To do this, we first try to `DATE_PARSE` it, but since this can fail when there's a 524 # timezone involved, we wrap it in a `TRY` call and use `PARSE_DATETIME` as a fallback, 525 # which seems to be using the same time mapping as Hive, as per: 526 # https://joda-time.sourceforge.net/apidocs/org/joda/time/format/DateTimeFormat.html 527 value_as_text = exp.cast(expression.this, exp.DataType.Type.TEXT) 528 parse_without_tz = self.func("DATE_PARSE", value_as_text, self.format_time(expression)) 529 parse_with_tz = self.func( 530 "PARSE_DATETIME", 531 value_as_text, 532 self.format_time(expression, Hive.INVERSE_TIME_MAPPING, Hive.INVERSE_TIME_TRIE), 533 ) 534 coalesced = self.func("COALESCE", self.func("TRY", parse_without_tz), parse_with_tz) 535 return self.func("TO_UNIXTIME", coalesced) 536 537 def bracket_sql(self, expression: exp.Bracket) -> str: 538 if expression.args.get("safe"): 539 return self.func( 540 "ELEMENT_AT", 541 expression.this, 542 seq_get( 543 apply_index_offset( 544 expression.this, 545 expression.expressions, 546 1 - expression.args.get("offset", 0), 547 ), 548 0, 549 ), 550 ) 551 return super().bracket_sql(expression) 552 553 def struct_sql(self, expression: exp.Struct) -> str: 554 from sqlglot.optimizer.annotate_types import annotate_types 555 556 expression = annotate_types(expression) 557 values: t.List[str] = [] 558 schema: t.List[str] = [] 559 unknown_type = False 560 561 for e in expression.expressions: 562 if isinstance(e, exp.PropertyEQ): 563 if e.type and e.type.is_type(exp.DataType.Type.UNKNOWN): 564 unknown_type = True 565 else: 566 schema.append(f"{self.sql(e, 'this')} {self.sql(e.type)}") 567 values.append(self.sql(e, "expression")) 568 else: 569 values.append(self.sql(e)) 570 571 size = len(expression.expressions) 572 573 if not size or len(schema) != size: 574 if unknown_type: 575 self.unsupported( 576 "Cannot convert untyped key-value definitions (try annotate_types)." 577 ) 578 return self.func("ROW", *values) 579 return f"CAST(ROW({', '.join(values)}) AS ROW({', '.join(schema)}))" 580 581 def interval_sql(self, expression: exp.Interval) -> str: 582 if expression.this and expression.text("unit").upper().startswith("WEEK"): 583 return f"({expression.this.name} * INTERVAL '7' DAY)" 584 return super().interval_sql(expression) 585 586 def transaction_sql(self, expression: exp.Transaction) -> str: 587 modes = expression.args.get("modes") 588 modes = f" {', '.join(modes)}" if modes else "" 589 return f"START TRANSACTION{modes}" 590 591 def generateseries_sql(self, expression: exp.GenerateSeries) -> str: 592 start = expression.args["start"] 593 end = expression.args["end"] 594 step = expression.args.get("step") 595 596 if isinstance(start, exp.Cast): 597 target_type = start.to 598 elif isinstance(end, exp.Cast): 599 target_type = end.to 600 else: 601 target_type = None 602 603 if target_type and target_type.is_type("timestamp"): 604 if target_type is start.to: 605 end = exp.cast(end, target_type) 606 else: 607 start = exp.cast(start, target_type) 608 609 return self.func("SEQUENCE", start, end, step) 610 611 def offset_limit_modifiers( 612 self, expression: exp.Expression, fetch: bool, limit: t.Optional[exp.Fetch | exp.Limit] 613 ) -> t.List[str]: 614 return [ 615 self.sql(expression, "offset"), 616 self.sql(limit), 617 ] 618 619 def create_sql(self, expression: exp.Create) -> str: 620 """ 621 Presto doesn't support CREATE VIEW with expressions (ex: `CREATE VIEW x (cola)` then `(cola)` is the expression), 622 so we need to remove them 623 """ 624 kind = expression.args["kind"] 625 schema = expression.this 626 if kind == "VIEW" and schema.expressions: 627 expression.this.set("expressions", None) 628 return super().create_sql(expression) 629 630 def delete_sql(self, expression: exp.Delete) -> str: 631 """ 632 Presto only supports DELETE FROM for a single table without an alias, so we need 633 to remove the unnecessary parts. If the original DELETE statement contains more 634 than one table to be deleted, we can't safely map it 1-1 to a Presto statement. 635 """ 636 tables = expression.args.get("tables") or [expression.this] 637 if len(tables) > 1: 638 return super().delete_sql(expression) 639 640 table = tables[0] 641 expression.set("this", table) 642 expression.set("tables", None) 643 644 if isinstance(table, exp.Table): 645 table_alias = table.args.get("alias") 646 if table_alias: 647 table_alias.pop() 648 expression = t.cast(exp.Delete, expression.transform(unqualify_columns)) 649 650 return super().delete_sql(expression)
Default NULL
ordering method to use if not explicitly set.
Possible values: "nulls_are_small"
, "nulls_are_large"
, "nulls_are_last"
Associates this dialect's time formats with their equivalent Python strftime
formats.
Whether the behavior of a / b
depends on the types of a
and b
.
False means a / b
is always float division.
True means a / b
is integer division if both a
and b
are integers.
Whether the base comes first in the LOG
function.
Possible values: True
, False
, None
(two arguments are not supported by LOG
)
Specifies the strategy according to which identifiers should be normalized.
Inherited Members
- sqlglot.dialects.dialect.Dialect
- Dialect
- WEEK_OFFSET
- UNNEST_COLUMN_ONLY
- ALIAS_POST_TABLESAMPLE
- IDENTIFIERS_CAN_START_WITH_DIGIT
- DPIPE_IS_STRING_CONCAT
- SUPPORTS_USER_DEFINED_TYPES
- COPY_PARAMS_ARE_CSV
- NORMALIZE_FUNCTIONS
- SAFE_DIVISION
- CONCAT_COALESCE
- HEX_LOWERCASE
- DATE_FORMAT
- DATEINT_FORMAT
- FORMAT_MAPPING
- UNESCAPED_SEQUENCES
- PSEUDOCOLUMNS
- PREFER_CTE_ALIAS_COLUMN
- FORCE_EARLY_ALIAS_REF_EXPANSION
- EXPAND_ALIAS_REFS_EARLY_ONLY_IN_GROUP_BY
- DATE_PART_MAPPING
- get_or_raise
- format_time
- settings
- normalize_identifier
- case_sensitive
- can_identify
- quote_identifier
- to_json_path
- parse
- parse_into
- generate
- transpile
- tokenize
- tokenizer
- jsonpath_tokenizer
- parser
- generator
214 class Tokenizer(tokens.Tokenizer): 215 UNICODE_STRINGS = [ 216 (prefix + q, q) 217 for q in t.cast(t.List[str], tokens.Tokenizer.QUOTES) 218 for prefix in ("U&", "u&") 219 ] 220 221 KEYWORDS = { 222 **tokens.Tokenizer.KEYWORDS, 223 "START": TokenType.BEGIN, 224 "MATCH_RECOGNIZE": TokenType.MATCH_RECOGNIZE, 225 "ROW": TokenType.STRUCT, 226 "IPADDRESS": TokenType.IPADDRESS, 227 "IPPREFIX": TokenType.IPPREFIX, 228 "TDIGEST": TokenType.TDIGEST, 229 "HYPERLOGLOG": TokenType.HLLSKETCH, 230 } 231 KEYWORDS.pop("/*+") 232 KEYWORDS.pop("QUALIFY")
Inherited Members
- sqlglot.tokens.Tokenizer
- Tokenizer
- SINGLE_TOKENS
- BIT_STRINGS
- BYTE_STRINGS
- HEX_STRINGS
- RAW_STRINGS
- HEREDOC_STRINGS
- IDENTIFIERS
- IDENTIFIER_ESCAPES
- QUOTES
- STRING_ESCAPES
- VAR_SINGLE_TOKENS
- HEREDOC_TAG_IS_IDENTIFIER
- HEREDOC_STRING_ALTERNATIVE
- STRING_ESCAPES_ALLOWED_IN_RAW_STRINGS
- WHITE_SPACE
- COMMANDS
- COMMAND_PREFIX_TOKENS
- NUMERIC_LITERALS
- COMMENTS
- dialect
- reset
- tokenize
- tokenize_rs
- size
- sql
- tokens
234 class Parser(parser.Parser): 235 VALUES_FOLLOWED_BY_PAREN = False 236 237 FUNCTIONS = { 238 **parser.Parser.FUNCTIONS, 239 "ARBITRARY": exp.AnyValue.from_arg_list, 240 "APPROX_DISTINCT": exp.ApproxDistinct.from_arg_list, 241 "APPROX_PERCENTILE": _build_approx_percentile, 242 "BITWISE_AND": binary_from_function(exp.BitwiseAnd), 243 "BITWISE_NOT": lambda args: exp.BitwiseNot(this=seq_get(args, 0)), 244 "BITWISE_OR": binary_from_function(exp.BitwiseOr), 245 "BITWISE_XOR": binary_from_function(exp.BitwiseXor), 246 "CARDINALITY": exp.ArraySize.from_arg_list, 247 "CONTAINS": exp.ArrayContains.from_arg_list, 248 "DATE_ADD": lambda args: exp.DateAdd( 249 this=seq_get(args, 2), expression=seq_get(args, 1), unit=seq_get(args, 0) 250 ), 251 "DATE_DIFF": lambda args: exp.DateDiff( 252 this=seq_get(args, 2), expression=seq_get(args, 1), unit=seq_get(args, 0) 253 ), 254 "DATE_FORMAT": build_formatted_time(exp.TimeToStr, "presto"), 255 "DATE_PARSE": build_formatted_time(exp.StrToTime, "presto"), 256 "DATE_TRUNC": date_trunc_to_time, 257 "ELEMENT_AT": lambda args: exp.Bracket( 258 this=seq_get(args, 0), expressions=[seq_get(args, 1)], offset=1, safe=True 259 ), 260 "FROM_HEX": exp.Unhex.from_arg_list, 261 "FROM_UNIXTIME": _build_from_unixtime, 262 "FROM_UTF8": lambda args: exp.Decode( 263 this=seq_get(args, 0), replace=seq_get(args, 1), charset=exp.Literal.string("utf-8") 264 ), 265 "NOW": exp.CurrentTimestamp.from_arg_list, 266 "REGEXP_EXTRACT": lambda args: exp.RegexpExtract( 267 this=seq_get(args, 0), expression=seq_get(args, 1), group=seq_get(args, 2) 268 ), 269 "REGEXP_REPLACE": lambda args: exp.RegexpReplace( 270 this=seq_get(args, 0), 271 expression=seq_get(args, 1), 272 replacement=seq_get(args, 2) or exp.Literal.string(""), 273 ), 274 "ROW": exp.Struct.from_arg_list, 275 "SEQUENCE": exp.GenerateSeries.from_arg_list, 276 "SET_AGG": exp.ArrayUniqueAgg.from_arg_list, 277 "SPLIT_TO_MAP": exp.StrToMap.from_arg_list, 278 "STRPOS": lambda args: exp.StrPosition( 279 this=seq_get(args, 0), substr=seq_get(args, 1), instance=seq_get(args, 2) 280 ), 281 "TO_CHAR": _build_to_char, 282 "TO_UNIXTIME": exp.TimeToUnix.from_arg_list, 283 "TO_UTF8": lambda args: exp.Encode( 284 this=seq_get(args, 0), charset=exp.Literal.string("utf-8") 285 ), 286 "MD5": exp.MD5Digest.from_arg_list, 287 "SHA256": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(256)), 288 "SHA512": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(512)), 289 } 290 291 FUNCTION_PARSERS = parser.Parser.FUNCTION_PARSERS.copy() 292 FUNCTION_PARSERS.pop("TRIM")
Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree.
Arguments:
- error_level: The desired error level. Default: ErrorLevel.IMMEDIATE
- error_message_context: The amount of context to capture from a query string when displaying the error message (in number of characters). Default: 100
- max_errors: Maximum number of error messages to include in a raised ParseError. This is only relevant if error_level is ErrorLevel.RAISE. Default: 3
Inherited Members
- sqlglot.parser.Parser
- Parser
- NO_PAREN_FUNCTIONS
- STRUCT_TYPE_TOKENS
- NESTED_TYPE_TOKENS
- ENUM_TYPE_TOKENS
- AGGREGATE_TYPE_TOKENS
- TYPE_TOKENS
- SIGNED_TO_UNSIGNED_TYPE_TOKEN
- SUBQUERY_PREDICATES
- RESERVED_TOKENS
- DB_CREATABLES
- CREATABLES
- INTERVAL_VARS
- ALIAS_TOKENS
- ARRAY_CONSTRUCTORS
- COMMENT_TABLE_ALIAS_TOKENS
- UPDATE_ALIAS_TOKENS
- TRIM_TYPES
- FUNC_TOKENS
- CONJUNCTION
- ASSIGNMENT
- DISJUNCTION
- EQUALITY
- COMPARISON
- BITWISE
- TERM
- FACTOR
- EXPONENT
- TIMES
- TIMESTAMPS
- SET_OPERATIONS
- JOIN_METHODS
- JOIN_SIDES
- JOIN_KINDS
- JOIN_HINTS
- LAMBDAS
- COLUMN_OPERATORS
- EXPRESSION_PARSERS
- STATEMENT_PARSERS
- UNARY_PARSERS
- STRING_PARSERS
- NUMERIC_PARSERS
- PRIMARY_PARSERS
- PLACEHOLDER_PARSERS
- RANGE_PARSERS
- PROPERTY_PARSERS
- CONSTRAINT_PARSERS
- ALTER_PARSERS
- ALTER_ALTER_PARSERS
- SCHEMA_UNNAMED_CONSTRAINTS
- NO_PAREN_FUNCTION_PARSERS
- INVALID_FUNC_NAME_TOKENS
- FUNCTIONS_WITH_ALIASED_ARGS
- KEY_VALUE_DEFINITIONS
- QUERY_MODIFIER_PARSERS
- SET_PARSERS
- SHOW_PARSERS
- TYPE_LITERAL_PARSERS
- TYPE_CONVERTERS
- DDL_SELECT_TOKENS
- PRE_VOLATILE_TOKENS
- TRANSACTION_KIND
- TRANSACTION_CHARACTERISTICS
- CONFLICT_ACTIONS
- CREATE_SEQUENCE
- ISOLATED_LOADING_OPTIONS
- USABLES
- CAST_ACTIONS
- INSERT_ALTERNATIVES
- CLONE_KEYWORDS
- HISTORICAL_DATA_KIND
- OPCLASS_FOLLOW_KEYWORDS
- OPTYPE_FOLLOW_TOKENS
- TABLE_INDEX_HINT_TOKENS
- VIEW_ATTRIBUTES
- WINDOW_ALIAS_TOKENS
- WINDOW_BEFORE_PAREN_TOKENS
- WINDOW_SIDES
- JSON_KEY_VALUE_SEPARATOR_TOKENS
- FETCH_TOKENS
- ADD_CONSTRAINT_TOKENS
- DISTINCT_TOKENS
- NULL_TOKENS
- UNNEST_OFFSET_ALIAS_TOKENS
- SELECT_START_TOKENS
- COPY_INTO_VARLEN_OPTIONS
- STRICT_CAST
- PREFIXED_PIVOT_COLUMNS
- IDENTIFY_PIVOT_STRINGS
- LOG_DEFAULTS_TO_LN
- ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN
- TABLESAMPLE_CSV
- DEFAULT_SAMPLING_METHOD
- SET_REQUIRES_ASSIGNMENT_DELIMITER
- TRIM_PATTERN_FIRST
- STRING_ALIASES
- MODIFIERS_ATTACHED_TO_SET_OP
- SET_OP_MODIFIERS
- NO_PAREN_IF_COMMANDS
- JSON_ARROWS_REQUIRE_JSON_TYPE
- COLON_IS_JSON_EXTRACT
- SUPPORTS_IMPLICIT_UNNEST
- INTERVAL_SPANS
- SUPPORTS_PARTITION_SELECTION
- error_level
- error_message_context
- max_errors
- dialect
- reset
- parse
- parse_into
- check_errors
- raise_error
- expression
- validate_expression
- errors
- sql
294 class Generator(generator.Generator): 295 INTERVAL_ALLOWS_PLURAL_FORM = False 296 JOIN_HINTS = False 297 TABLE_HINTS = False 298 QUERY_HINTS = False 299 IS_BOOL_ALLOWED = False 300 TZ_TO_WITH_TIME_ZONE = True 301 NVL2_SUPPORTED = False 302 STRUCT_DELIMITER = ("(", ")") 303 LIMIT_ONLY_LITERALS = True 304 SUPPORTS_SINGLE_ARG_CONCAT = False 305 LIKE_PROPERTY_INSIDE_SCHEMA = True 306 MULTI_ARG_DISTINCT = False 307 SUPPORTS_TO_NUMBER = False 308 HEX_FUNC = "TO_HEX" 309 310 PROPERTIES_LOCATION = { 311 **generator.Generator.PROPERTIES_LOCATION, 312 exp.LocationProperty: exp.Properties.Location.UNSUPPORTED, 313 exp.VolatileProperty: exp.Properties.Location.UNSUPPORTED, 314 } 315 316 TYPE_MAPPING = { 317 **generator.Generator.TYPE_MAPPING, 318 exp.DataType.Type.INT: "INTEGER", 319 exp.DataType.Type.FLOAT: "REAL", 320 exp.DataType.Type.BINARY: "VARBINARY", 321 exp.DataType.Type.TEXT: "VARCHAR", 322 exp.DataType.Type.TIMETZ: "TIME", 323 exp.DataType.Type.TIMESTAMPTZ: "TIMESTAMP", 324 exp.DataType.Type.STRUCT: "ROW", 325 exp.DataType.Type.DATETIME: "TIMESTAMP", 326 exp.DataType.Type.DATETIME64: "TIMESTAMP", 327 exp.DataType.Type.HLLSKETCH: "HYPERLOGLOG", 328 } 329 330 TRANSFORMS = { 331 **generator.Generator.TRANSFORMS, 332 exp.AnyValue: rename_func("ARBITRARY"), 333 exp.ApproxDistinct: lambda self, e: self.func( 334 "APPROX_DISTINCT", e.this, e.args.get("accuracy") 335 ), 336 exp.ApproxQuantile: rename_func("APPROX_PERCENTILE"), 337 exp.ArgMax: rename_func("MAX_BY"), 338 exp.ArgMin: rename_func("MIN_BY"), 339 exp.Array: lambda self, e: f"ARRAY[{self.expressions(e, flat=True)}]", 340 exp.ArrayAny: rename_func("ANY_MATCH"), 341 exp.ArrayConcat: rename_func("CONCAT"), 342 exp.ArrayContains: rename_func("CONTAINS"), 343 exp.ArraySize: rename_func("CARDINALITY"), 344 exp.ArrayToString: rename_func("ARRAY_JOIN"), 345 exp.ArrayUniqueAgg: rename_func("SET_AGG"), 346 exp.AtTimeZone: rename_func("AT_TIMEZONE"), 347 exp.BitwiseAnd: lambda self, e: self.func("BITWISE_AND", e.this, e.expression), 348 exp.BitwiseLeftShift: lambda self, e: self.func( 349 "BITWISE_ARITHMETIC_SHIFT_LEFT", e.this, e.expression 350 ), 351 exp.BitwiseNot: lambda self, e: self.func("BITWISE_NOT", e.this), 352 exp.BitwiseOr: lambda self, e: self.func("BITWISE_OR", e.this, e.expression), 353 exp.BitwiseRightShift: lambda self, e: self.func( 354 "BITWISE_ARITHMETIC_SHIFT_RIGHT", e.this, e.expression 355 ), 356 exp.BitwiseXor: lambda self, e: self.func("BITWISE_XOR", e.this, e.expression), 357 exp.Cast: transforms.preprocess([transforms.epoch_cast_to_ts]), 358 exp.CurrentTimestamp: lambda *_: "CURRENT_TIMESTAMP", 359 exp.DateAdd: lambda self, e: self.func( 360 "DATE_ADD", 361 unit_to_str(e), 362 _to_int(e.expression), 363 e.this, 364 ), 365 exp.DateDiff: lambda self, e: self.func( 366 "DATE_DIFF", unit_to_str(e), e.expression, e.this 367 ), 368 exp.DateStrToDate: datestrtodate_sql, 369 exp.DateToDi: lambda self, 370 e: f"CAST(DATE_FORMAT({self.sql(e, 'this')}, {Presto.DATEINT_FORMAT}) AS INT)", 371 exp.DateSub: lambda self, e: self.func( 372 "DATE_ADD", 373 unit_to_str(e), 374 _to_int(e.expression * -1), 375 e.this, 376 ), 377 exp.Decode: lambda self, e: encode_decode_sql(self, e, "FROM_UTF8"), 378 exp.DiToDate: lambda self, 379 e: f"CAST(DATE_PARSE(CAST({self.sql(e, 'this')} AS VARCHAR), {Presto.DATEINT_FORMAT}) AS DATE)", 380 exp.Encode: lambda self, e: encode_decode_sql(self, e, "TO_UTF8"), 381 exp.FileFormatProperty: lambda self, e: f"FORMAT='{e.name.upper()}'", 382 exp.First: _first_last_sql, 383 exp.FirstValue: _first_last_sql, 384 exp.FromTimeZone: lambda self, 385 e: f"WITH_TIMEZONE({self.sql(e, 'this')}, {self.sql(e, 'zone')}) AT TIME ZONE 'UTC'", 386 exp.Group: transforms.preprocess([transforms.unalias_group]), 387 exp.GroupConcat: lambda self, e: self.func( 388 "ARRAY_JOIN", self.func("ARRAY_AGG", e.this), e.args.get("separator") 389 ), 390 exp.If: if_sql(), 391 exp.ILike: no_ilike_sql, 392 exp.Initcap: _initcap_sql, 393 exp.ParseJSON: rename_func("JSON_PARSE"), 394 exp.Last: _first_last_sql, 395 exp.LastValue: _first_last_sql, 396 exp.LastDay: lambda self, e: self.func("LAST_DAY_OF_MONTH", e.this), 397 exp.Lateral: _explode_to_unnest_sql, 398 exp.Left: left_to_substring_sql, 399 exp.Levenshtein: rename_func("LEVENSHTEIN_DISTANCE"), 400 exp.LogicalAnd: rename_func("BOOL_AND"), 401 exp.LogicalOr: rename_func("BOOL_OR"), 402 exp.Pivot: no_pivot_sql, 403 exp.Quantile: _quantile_sql, 404 exp.RegexpExtract: regexp_extract_sql, 405 exp.Right: right_to_substring_sql, 406 exp.SafeDivide: no_safe_divide_sql, 407 exp.Schema: _schema_sql, 408 exp.SchemaCommentProperty: lambda self, e: self.naked_property(e), 409 exp.Select: transforms.preprocess( 410 [ 411 transforms.eliminate_qualify, 412 transforms.eliminate_distinct_on, 413 transforms.explode_to_unnest(1), 414 transforms.eliminate_semi_and_anti_joins, 415 ] 416 ), 417 exp.SortArray: _no_sort_array, 418 exp.StrPosition: lambda self, e: str_position_sql(self, e, generate_instance=True), 419 exp.StrToDate: lambda self, e: f"CAST({_str_to_time_sql(self, e)} AS DATE)", 420 exp.StrToMap: rename_func("SPLIT_TO_MAP"), 421 exp.StrToTime: _str_to_time_sql, 422 exp.StructExtract: struct_extract_sql, 423 exp.Table: transforms.preprocess([_unnest_sequence]), 424 exp.Timestamp: no_timestamp_sql, 425 exp.TimestampTrunc: timestamptrunc_sql(), 426 exp.TimeStrToDate: timestrtotime_sql, 427 exp.TimeStrToTime: timestrtotime_sql, 428 exp.TimeStrToUnix: lambda self, e: self.func( 429 "TO_UNIXTIME", self.func("DATE_PARSE", e.this, Presto.TIME_FORMAT) 430 ), 431 exp.TimeToStr: lambda self, e: self.func("DATE_FORMAT", e.this, self.format_time(e)), 432 exp.TimeToUnix: rename_func("TO_UNIXTIME"), 433 exp.ToChar: lambda self, e: self.func("DATE_FORMAT", e.this, self.format_time(e)), 434 exp.TryCast: transforms.preprocess([transforms.epoch_cast_to_ts]), 435 exp.TsOrDiToDi: lambda self, 436 e: f"CAST(SUBSTR(REPLACE(CAST({self.sql(e, 'this')} AS VARCHAR), '-', ''), 1, 8) AS INT)", 437 exp.TsOrDsAdd: _ts_or_ds_add_sql, 438 exp.TsOrDsDiff: _ts_or_ds_diff_sql, 439 exp.TsOrDsToDate: _ts_or_ds_to_date_sql, 440 exp.Unhex: rename_func("FROM_HEX"), 441 exp.UnixToStr: lambda self, 442 e: f"DATE_FORMAT(FROM_UNIXTIME({self.sql(e, 'this')}), {self.format_time(e)})", 443 exp.UnixToTime: _unix_to_time_sql, 444 exp.UnixToTimeStr: lambda self, 445 e: f"CAST(FROM_UNIXTIME({self.sql(e, 'this')}) AS VARCHAR)", 446 exp.VariancePop: rename_func("VAR_POP"), 447 exp.With: transforms.preprocess([transforms.add_recursive_cte_column_names]), 448 exp.WithinGroup: transforms.preprocess( 449 [transforms.remove_within_group_for_percentiles] 450 ), 451 exp.Xor: bool_xor_sql, 452 exp.MD5: lambda self, e: self.func( 453 "LOWER", self.func("TO_HEX", self.func("MD5", self.sql(e, "this"))) 454 ), 455 exp.MD5Digest: rename_func("MD5"), 456 exp.SHA: rename_func("SHA1"), 457 exp.SHA2: sha256_sql, 458 } 459 460 RESERVED_KEYWORDS = { 461 "alter", 462 "and", 463 "as", 464 "between", 465 "by", 466 "case", 467 "cast", 468 "constraint", 469 "create", 470 "cross", 471 "current_time", 472 "current_timestamp", 473 "deallocate", 474 "delete", 475 "describe", 476 "distinct", 477 "drop", 478 "else", 479 "end", 480 "escape", 481 "except", 482 "execute", 483 "exists", 484 "extract", 485 "false", 486 "for", 487 "from", 488 "full", 489 "group", 490 "having", 491 "in", 492 "inner", 493 "insert", 494 "intersect", 495 "into", 496 "is", 497 "join", 498 "left", 499 "like", 500 "natural", 501 "not", 502 "null", 503 "on", 504 "or", 505 "order", 506 "outer", 507 "prepare", 508 "right", 509 "select", 510 "table", 511 "then", 512 "true", 513 "union", 514 "using", 515 "values", 516 "when", 517 "where", 518 "with", 519 } 520 521 def strtounix_sql(self, expression: exp.StrToUnix) -> str: 522 # Since `TO_UNIXTIME` requires a `TIMESTAMP`, we need to parse the argument into one. 523 # To do this, we first try to `DATE_PARSE` it, but since this can fail when there's a 524 # timezone involved, we wrap it in a `TRY` call and use `PARSE_DATETIME` as a fallback, 525 # which seems to be using the same time mapping as Hive, as per: 526 # https://joda-time.sourceforge.net/apidocs/org/joda/time/format/DateTimeFormat.html 527 value_as_text = exp.cast(expression.this, exp.DataType.Type.TEXT) 528 parse_without_tz = self.func("DATE_PARSE", value_as_text, self.format_time(expression)) 529 parse_with_tz = self.func( 530 "PARSE_DATETIME", 531 value_as_text, 532 self.format_time(expression, Hive.INVERSE_TIME_MAPPING, Hive.INVERSE_TIME_TRIE), 533 ) 534 coalesced = self.func("COALESCE", self.func("TRY", parse_without_tz), parse_with_tz) 535 return self.func("TO_UNIXTIME", coalesced) 536 537 def bracket_sql(self, expression: exp.Bracket) -> str: 538 if expression.args.get("safe"): 539 return self.func( 540 "ELEMENT_AT", 541 expression.this, 542 seq_get( 543 apply_index_offset( 544 expression.this, 545 expression.expressions, 546 1 - expression.args.get("offset", 0), 547 ), 548 0, 549 ), 550 ) 551 return super().bracket_sql(expression) 552 553 def struct_sql(self, expression: exp.Struct) -> str: 554 from sqlglot.optimizer.annotate_types import annotate_types 555 556 expression = annotate_types(expression) 557 values: t.List[str] = [] 558 schema: t.List[str] = [] 559 unknown_type = False 560 561 for e in expression.expressions: 562 if isinstance(e, exp.PropertyEQ): 563 if e.type and e.type.is_type(exp.DataType.Type.UNKNOWN): 564 unknown_type = True 565 else: 566 schema.append(f"{self.sql(e, 'this')} {self.sql(e.type)}") 567 values.append(self.sql(e, "expression")) 568 else: 569 values.append(self.sql(e)) 570 571 size = len(expression.expressions) 572 573 if not size or len(schema) != size: 574 if unknown_type: 575 self.unsupported( 576 "Cannot convert untyped key-value definitions (try annotate_types)." 577 ) 578 return self.func("ROW", *values) 579 return f"CAST(ROW({', '.join(values)}) AS ROW({', '.join(schema)}))" 580 581 def interval_sql(self, expression: exp.Interval) -> str: 582 if expression.this and expression.text("unit").upper().startswith("WEEK"): 583 return f"({expression.this.name} * INTERVAL '7' DAY)" 584 return super().interval_sql(expression) 585 586 def transaction_sql(self, expression: exp.Transaction) -> str: 587 modes = expression.args.get("modes") 588 modes = f" {', '.join(modes)}" if modes else "" 589 return f"START TRANSACTION{modes}" 590 591 def generateseries_sql(self, expression: exp.GenerateSeries) -> str: 592 start = expression.args["start"] 593 end = expression.args["end"] 594 step = expression.args.get("step") 595 596 if isinstance(start, exp.Cast): 597 target_type = start.to 598 elif isinstance(end, exp.Cast): 599 target_type = end.to 600 else: 601 target_type = None 602 603 if target_type and target_type.is_type("timestamp"): 604 if target_type is start.to: 605 end = exp.cast(end, target_type) 606 else: 607 start = exp.cast(start, target_type) 608 609 return self.func("SEQUENCE", start, end, step) 610 611 def offset_limit_modifiers( 612 self, expression: exp.Expression, fetch: bool, limit: t.Optional[exp.Fetch | exp.Limit] 613 ) -> t.List[str]: 614 return [ 615 self.sql(expression, "offset"), 616 self.sql(limit), 617 ] 618 619 def create_sql(self, expression: exp.Create) -> str: 620 """ 621 Presto doesn't support CREATE VIEW with expressions (ex: `CREATE VIEW x (cola)` then `(cola)` is the expression), 622 so we need to remove them 623 """ 624 kind = expression.args["kind"] 625 schema = expression.this 626 if kind == "VIEW" and schema.expressions: 627 expression.this.set("expressions", None) 628 return super().create_sql(expression) 629 630 def delete_sql(self, expression: exp.Delete) -> str: 631 """ 632 Presto only supports DELETE FROM for a single table without an alias, so we need 633 to remove the unnecessary parts. If the original DELETE statement contains more 634 than one table to be deleted, we can't safely map it 1-1 to a Presto statement. 635 """ 636 tables = expression.args.get("tables") or [expression.this] 637 if len(tables) > 1: 638 return super().delete_sql(expression) 639 640 table = tables[0] 641 expression.set("this", table) 642 expression.set("tables", None) 643 644 if isinstance(table, exp.Table): 645 table_alias = table.args.get("alias") 646 if table_alias: 647 table_alias.pop() 648 expression = t.cast(exp.Delete, expression.transform(unqualify_columns)) 649 650 return super().delete_sql(expression)
Generator converts a given syntax tree to the corresponding SQL string.
Arguments:
- pretty: Whether to format the produced SQL string. Default: False.
- identify: Determines when an identifier should be quoted. Possible values are: False (default): Never quote, except in cases where it's mandatory by the dialect. True or 'always': Always quote. 'safe': Only quote identifiers that are case insensitive.
- normalize: Whether to normalize identifiers to lowercase. Default: False.
- pad: The pad size in a formatted string. For example, this affects the indentation of a projection in a query, relative to its nesting level. Default: 2.
- indent: The indentation size in a formatted string. For example, this affects the
indentation of subqueries and filters under a
WHERE
clause. Default: 2. - normalize_functions: How to normalize function names. Possible values are: "upper" or True (default): Convert names to uppercase. "lower": Convert names to lowercase. False: Disables function name normalization.
- unsupported_level: Determines the generator's behavior when it encounters unsupported expressions. Default ErrorLevel.WARN.
- max_unsupported: Maximum number of unsupported messages to include in a raised UnsupportedError. This is only relevant if unsupported_level is ErrorLevel.RAISE. Default: 3
- leading_comma: Whether the comma is leading or trailing in select expressions. This is only relevant when generating in pretty mode. Default: False
- max_text_width: The max number of characters in a segment before creating new lines in pretty mode. The default is on the smaller end because the length only represents a segment and not the true line length. Default: 80
- comments: Whether to preserve comments in the output SQL code. Default: True
521 def strtounix_sql(self, expression: exp.StrToUnix) -> str: 522 # Since `TO_UNIXTIME` requires a `TIMESTAMP`, we need to parse the argument into one. 523 # To do this, we first try to `DATE_PARSE` it, but since this can fail when there's a 524 # timezone involved, we wrap it in a `TRY` call and use `PARSE_DATETIME` as a fallback, 525 # which seems to be using the same time mapping as Hive, as per: 526 # https://joda-time.sourceforge.net/apidocs/org/joda/time/format/DateTimeFormat.html 527 value_as_text = exp.cast(expression.this, exp.DataType.Type.TEXT) 528 parse_without_tz = self.func("DATE_PARSE", value_as_text, self.format_time(expression)) 529 parse_with_tz = self.func( 530 "PARSE_DATETIME", 531 value_as_text, 532 self.format_time(expression, Hive.INVERSE_TIME_MAPPING, Hive.INVERSE_TIME_TRIE), 533 ) 534 coalesced = self.func("COALESCE", self.func("TRY", parse_without_tz), parse_with_tz) 535 return self.func("TO_UNIXTIME", coalesced)
537 def bracket_sql(self, expression: exp.Bracket) -> str: 538 if expression.args.get("safe"): 539 return self.func( 540 "ELEMENT_AT", 541 expression.this, 542 seq_get( 543 apply_index_offset( 544 expression.this, 545 expression.expressions, 546 1 - expression.args.get("offset", 0), 547 ), 548 0, 549 ), 550 ) 551 return super().bracket_sql(expression)
553 def struct_sql(self, expression: exp.Struct) -> str: 554 from sqlglot.optimizer.annotate_types import annotate_types 555 556 expression = annotate_types(expression) 557 values: t.List[str] = [] 558 schema: t.List[str] = [] 559 unknown_type = False 560 561 for e in expression.expressions: 562 if isinstance(e, exp.PropertyEQ): 563 if e.type and e.type.is_type(exp.DataType.Type.UNKNOWN): 564 unknown_type = True 565 else: 566 schema.append(f"{self.sql(e, 'this')} {self.sql(e.type)}") 567 values.append(self.sql(e, "expression")) 568 else: 569 values.append(self.sql(e)) 570 571 size = len(expression.expressions) 572 573 if not size or len(schema) != size: 574 if unknown_type: 575 self.unsupported( 576 "Cannot convert untyped key-value definitions (try annotate_types)." 577 ) 578 return self.func("ROW", *values) 579 return f"CAST(ROW({', '.join(values)}) AS ROW({', '.join(schema)}))"
591 def generateseries_sql(self, expression: exp.GenerateSeries) -> str: 592 start = expression.args["start"] 593 end = expression.args["end"] 594 step = expression.args.get("step") 595 596 if isinstance(start, exp.Cast): 597 target_type = start.to 598 elif isinstance(end, exp.Cast): 599 target_type = end.to 600 else: 601 target_type = None 602 603 if target_type and target_type.is_type("timestamp"): 604 if target_type is start.to: 605 end = exp.cast(end, target_type) 606 else: 607 start = exp.cast(start, target_type) 608 609 return self.func("SEQUENCE", start, end, step)
619 def create_sql(self, expression: exp.Create) -> str: 620 """ 621 Presto doesn't support CREATE VIEW with expressions (ex: `CREATE VIEW x (cola)` then `(cola)` is the expression), 622 so we need to remove them 623 """ 624 kind = expression.args["kind"] 625 schema = expression.this 626 if kind == "VIEW" and schema.expressions: 627 expression.this.set("expressions", None) 628 return super().create_sql(expression)
Presto doesn't support CREATE VIEW with expressions (ex: CREATE VIEW x (cola)
then (cola)
is the expression),
so we need to remove them
630 def delete_sql(self, expression: exp.Delete) -> str: 631 """ 632 Presto only supports DELETE FROM for a single table without an alias, so we need 633 to remove the unnecessary parts. If the original DELETE statement contains more 634 than one table to be deleted, we can't safely map it 1-1 to a Presto statement. 635 """ 636 tables = expression.args.get("tables") or [expression.this] 637 if len(tables) > 1: 638 return super().delete_sql(expression) 639 640 table = tables[0] 641 expression.set("this", table) 642 expression.set("tables", None) 643 644 if isinstance(table, exp.Table): 645 table_alias = table.args.get("alias") 646 if table_alias: 647 table_alias.pop() 648 expression = t.cast(exp.Delete, expression.transform(unqualify_columns)) 649 650 return super().delete_sql(expression)
Presto only supports DELETE FROM for a single table without an alias, so we need to remove the unnecessary parts. If the original DELETE statement contains more than one table to be deleted, we can't safely map it 1-1 to a Presto statement.
Inherited Members
- sqlglot.generator.Generator
- Generator
- NULL_ORDERING_SUPPORTED
- IGNORE_NULLS_IN_FUNC
- LOCKING_READS_SUPPORTED
- EXPLICIT_SET_OP
- WRAP_DERIVED_VALUES
- CREATE_FUNCTION_RETURN_AS
- MATCHED_BY_SOURCE
- SINGLE_STRING_INTERVAL
- LIMIT_FETCH
- RENAME_TABLE_WITH_DB
- GROUPINGS_SEP
- INDEX_ON
- QUERY_HINT_SEP
- DUPLICATE_KEY_UPDATE_WITH_SET
- LIMIT_IS_TOP
- RETURNING_END
- EXTRACT_ALLOWS_QUOTES
- VALUES_AS_TABLE
- ALTER_TABLE_INCLUDE_COLUMN_KEYWORD
- UNNEST_WITH_ORDINALITY
- AGGREGATE_FILTER_SUPPORTED
- SEMI_ANTI_JOIN_WITH_SIDE
- COMPUTED_COLUMN_WITH_TYPE
- SUPPORTS_TABLE_COPY
- TABLESAMPLE_REQUIRES_PARENS
- TABLESAMPLE_SIZE_IS_ROWS
- TABLESAMPLE_KEYWORDS
- TABLESAMPLE_WITH_METHOD
- TABLESAMPLE_SEED_KEYWORD
- COLLATE_IS_FUNC
- DATA_TYPE_SPECIFIERS_ALLOWED
- ENSURE_BOOLS
- CTE_RECURSIVE_KEYWORD_REQUIRED
- LAST_DAY_SUPPORTS_DATE_PART
- SUPPORTS_TABLE_ALIAS_COLUMNS
- UNPIVOT_ALIASES_ARE_IDENTIFIERS
- JSON_KEY_VALUE_PAIR_SEP
- INSERT_OVERWRITE
- SUPPORTS_SELECT_INTO
- SUPPORTS_UNLOGGED_TABLES
- SUPPORTS_CREATE_TABLE_LIKE
- JSON_TYPE_REQUIRED_FOR_EXTRACTION
- JSON_PATH_BRACKETED_KEY_SUPPORTED
- JSON_PATH_SINGLE_QUOTE_ESCAPE
- SUPPORTED_JSON_PATH_PARTS
- CAN_IMPLEMENT_ARRAY_ANY
- SET_OP_MODIFIERS
- COPY_PARAMS_ARE_WRAPPED
- COPY_PARAMS_EQ_REQUIRED
- COPY_HAS_INTO_KEYWORD
- TRY_SUPPORTED
- SUPPORTS_UESCAPE
- STAR_EXCEPT
- WITH_PROPERTIES_PREFIX
- QUOTE_JSON_PATH
- TIME_PART_SINGULARS
- TOKEN_MAPPING
- PARAMETER_TOKEN
- NAMED_PLACEHOLDER_TOKEN
- WITH_SEPARATED_COMMENTS
- EXCLUDE_COMMENTS
- UNWRAPPED_INTERVAL_VALUES
- PARAMETERIZABLE_TEXT_TYPES
- EXPRESSIONS_WITHOUT_NESTED_CTES
- SENTINEL_LINE_BREAK
- pretty
- identify
- normalize
- pad
- unsupported_level
- max_unsupported
- leading_comma
- max_text_width
- comments
- dialect
- normalize_functions
- unsupported_messages
- generate
- preprocess
- unsupported
- sep
- seg
- pad_comment
- maybe_comment
- wrap
- no_identify
- normalize_func
- indent
- sql
- uncache_sql
- cache_sql
- characterset_sql
- column_parts
- column_sql
- columnposition_sql
- columndef_sql
- columnconstraint_sql
- computedcolumnconstraint_sql
- autoincrementcolumnconstraint_sql
- compresscolumnconstraint_sql
- generatedasidentitycolumnconstraint_sql
- generatedasrowcolumnconstraint_sql
- periodforsystemtimeconstraint_sql
- notnullcolumnconstraint_sql
- transformcolumnconstraint_sql
- primarykeycolumnconstraint_sql
- uniquecolumnconstraint_sql
- createable_sql
- sequenceproperties_sql
- clone_sql
- describe_sql
- heredoc_sql
- prepend_ctes
- with_sql
- cte_sql
- tablealias_sql
- bitstring_sql
- hexstring_sql
- bytestring_sql
- unicodestring_sql
- rawstring_sql
- datatypeparam_sql
- datatype_sql
- directory_sql
- drop_sql
- except_sql
- except_op
- fetch_sql
- filter_sql
- hint_sql
- indexparameters_sql
- index_sql
- identifier_sql
- hex_sql
- lowerhex_sql
- inputoutputformat_sql
- national_sql
- partition_sql
- properties_sql
- root_properties
- properties
- with_properties
- locate_properties
- property_name
- property_sql
- likeproperty_sql
- fallbackproperty_sql
- journalproperty_sql
- freespaceproperty_sql
- checksumproperty_sql
- mergeblockratioproperty_sql
- datablocksizeproperty_sql
- blockcompressionproperty_sql
- isolatedloadingproperty_sql
- partitionboundspec_sql
- partitionedofproperty_sql
- lockingproperty_sql
- withdataproperty_sql
- withsystemversioningproperty_sql
- insert_sql
- intersect_sql
- intersect_op
- introducer_sql
- kill_sql
- pseudotype_sql
- objectidentifier_sql
- onconflict_sql
- returning_sql
- rowformatdelimitedproperty_sql
- withtablehint_sql
- indextablehint_sql
- historicaldata_sql
- table_parts
- table_sql
- tablesample_sql
- pivot_sql
- version_sql
- tuple_sql
- update_sql
- values_sql
- var_sql
- into_sql
- from_sql
- group_sql
- having_sql
- connect_sql
- prior_sql
- join_sql
- lambda_sql
- lateral_op
- lateral_sql
- limit_sql
- offset_sql
- setitem_sql
- set_sql
- pragma_sql
- lock_sql
- literal_sql
- escape_str
- loaddata_sql
- null_sql
- boolean_sql
- order_sql
- withfill_sql
- cluster_sql
- distribute_sql
- sort_sql
- ordered_sql
- matchrecognizemeasure_sql
- matchrecognize_sql
- query_modifiers
- options_modifier
- queryoption_sql
- after_limit_modifiers
- select_sql
- schema_sql
- schema_columns_sql
- star_sql
- parameter_sql
- sessionparameter_sql
- placeholder_sql
- subquery_sql
- qualify_sql
- set_operations
- union_sql
- union_op
- unnest_sql
- prewhere_sql
- where_sql
- window_sql
- partition_by_sql
- windowspec_sql
- withingroup_sql
- between_sql
- bracket_offset_expressions
- all_sql
- any_sql
- exists_sql
- case_sql
- constraint_sql
- nextvaluefor_sql
- extract_sql
- trim_sql
- convert_concat_args
- concat_sql
- concatws_sql
- check_sql
- foreignkey_sql
- primarykey_sql
- if_sql
- matchagainst_sql
- jsonkeyvalue_sql
- jsonpath_sql
- json_path_part
- formatjson_sql
- jsonobject_sql
- jsonobjectagg_sql
- jsonarray_sql
- jsonarrayagg_sql
- jsoncolumndef_sql
- jsonschema_sql
- jsontable_sql
- openjsoncolumndef_sql
- openjson_sql
- in_sql
- in_unnest_op
- return_sql
- reference_sql
- anonymous_sql
- paren_sql
- neg_sql
- not_sql
- alias_sql
- pivotalias_sql
- aliases_sql
- atindex_sql
- attimezone_sql
- fromtimezone_sql
- add_sql
- and_sql
- or_sql
- xor_sql
- connector_sql
- bitwiseand_sql
- bitwiseleftshift_sql
- bitwisenot_sql
- bitwiseor_sql
- bitwiserightshift_sql
- bitwisexor_sql
- cast_sql
- currentdate_sql
- currenttimestamp_sql
- collate_sql
- command_sql
- comment_sql
- mergetreettlaction_sql
- mergetreettl_sql
- commit_sql
- rollback_sql
- altercolumn_sql
- alterdiststyle_sql
- altersortkey_sql
- renametable_sql
- renamecolumn_sql
- alterset_sql
- altertable_sql
- add_column_sql
- droppartition_sql
- addconstraint_sql
- distinct_sql
- ignorenulls_sql
- respectnulls_sql
- havingmax_sql
- intdiv_sql
- dpipe_sql
- div_sql
- overlaps_sql
- distance_sql
- dot_sql
- eq_sql
- propertyeq_sql
- escape_sql
- glob_sql
- gt_sql
- gte_sql
- ilike_sql
- ilikeany_sql
- is_sql
- like_sql
- likeany_sql
- similarto_sql
- lt_sql
- lte_sql
- mod_sql
- mul_sql
- neq_sql
- nullsafeeq_sql
- nullsafeneq_sql
- slice_sql
- sub_sql
- trycast_sql
- try_sql
- log_sql
- use_sql
- binary
- function_fallback_sql
- func
- format_args
- too_wide
- format_time
- expressions
- op_expressions
- naked_property
- tag_sql
- token_sql
- userdefinedfunction_sql
- joinhint_sql
- kwarg_sql
- when_sql
- merge_sql
- tochar_sql
- tonumber_sql
- dictproperty_sql
- dictrange_sql
- dictsubproperty_sql
- oncluster_sql
- clusteredbyproperty_sql
- anyvalue_sql
- querytransform_sql
- indexconstraintoption_sql
- checkcolumnconstraint_sql
- indexcolumnconstraint_sql
- nvl2_sql
- comprehension_sql
- columnprefix_sql
- opclass_sql
- predict_sql
- forin_sql
- refresh_sql
- operator_sql
- toarray_sql
- tsordstotime_sql
- tsordstotimestamp_sql
- tsordstodate_sql
- unixdate_sql
- lastday_sql
- dateadd_sql
- arrayany_sql
- partitionrange_sql
- truncatetable_sql
- convert_sql
- copyparameter_sql
- credentials_sql
- copy_sql
- semicolon_sql
- datadeletionproperty_sql
- maskingpolicycolumnconstraint_sql
- gapfill_sql
- scope_resolution
- scoperesolution_sql