sqlglot.dialects.clickhouse
1from __future__ import annotations 2 3import typing as t 4 5from sqlglot import exp, generator, parser, tokens, transforms 6from sqlglot.dialects.dialect import ( 7 Dialect, 8 arg_max_or_min_no_count, 9 build_date_delta, 10 build_formatted_time, 11 inline_array_sql, 12 json_extract_segments, 13 json_path_key_only_name, 14 no_pivot_sql, 15 build_json_extract_path, 16 rename_func, 17 sha256_sql, 18 var_map_sql, 19 timestamptrunc_sql, 20 unit_to_var, 21) 22from sqlglot.generator import Generator 23from sqlglot.helper import is_int, seq_get 24from sqlglot.tokens import Token, TokenType 25 26DATEΤΙΜΕ_DELTA = t.Union[exp.DateAdd, exp.DateDiff, exp.DateSub, exp.TimestampSub, exp.TimestampAdd] 27 28 29def _build_date_format(args: t.List) -> exp.TimeToStr: 30 expr = build_formatted_time(exp.TimeToStr, "clickhouse")(args) 31 32 timezone = seq_get(args, 2) 33 if timezone: 34 expr.set("timezone", timezone) 35 36 return expr 37 38 39def _unix_to_time_sql(self: ClickHouse.Generator, expression: exp.UnixToTime) -> str: 40 scale = expression.args.get("scale") 41 timestamp = expression.this 42 43 if scale in (None, exp.UnixToTime.SECONDS): 44 return self.func("fromUnixTimestamp", exp.cast(timestamp, exp.DataType.Type.BIGINT)) 45 if scale == exp.UnixToTime.MILLIS: 46 return self.func("fromUnixTimestamp64Milli", exp.cast(timestamp, exp.DataType.Type.BIGINT)) 47 if scale == exp.UnixToTime.MICROS: 48 return self.func("fromUnixTimestamp64Micro", exp.cast(timestamp, exp.DataType.Type.BIGINT)) 49 if scale == exp.UnixToTime.NANOS: 50 return self.func("fromUnixTimestamp64Nano", exp.cast(timestamp, exp.DataType.Type.BIGINT)) 51 52 return self.func( 53 "fromUnixTimestamp", 54 exp.cast( 55 exp.Div(this=timestamp, expression=exp.func("POW", 10, scale)), exp.DataType.Type.BIGINT 56 ), 57 ) 58 59 60def _lower_func(sql: str) -> str: 61 index = sql.index("(") 62 return sql[:index].lower() + sql[index:] 63 64 65def _quantile_sql(self: ClickHouse.Generator, expression: exp.Quantile) -> str: 66 quantile = expression.args["quantile"] 67 args = f"({self.sql(expression, 'this')})" 68 69 if isinstance(quantile, exp.Array): 70 func = self.func("quantiles", *quantile) 71 else: 72 func = self.func("quantile", quantile) 73 74 return func + args 75 76 77def _build_count_if(args: t.List) -> exp.CountIf | exp.CombinedAggFunc: 78 if len(args) == 1: 79 return exp.CountIf(this=seq_get(args, 0)) 80 81 return exp.CombinedAggFunc(this="countIf", expressions=args, parts=("count", "If")) 82 83 84def _datetime_delta_sql(name: str) -> t.Callable[[Generator, DATEΤΙΜΕ_DELTA], str]: 85 def _delta_sql(self: Generator, expression: DATEΤΙΜΕ_DELTA) -> str: 86 if not expression.unit: 87 return rename_func(name)(self, expression) 88 89 return self.func( 90 name, 91 unit_to_var(expression), 92 expression.expression, 93 expression.this, 94 ) 95 96 return _delta_sql 97 98 99class ClickHouse(Dialect): 100 NORMALIZE_FUNCTIONS: bool | str = False 101 NULL_ORDERING = "nulls_are_last" 102 SUPPORTS_USER_DEFINED_TYPES = False 103 SAFE_DIVISION = True 104 LOG_BASE_FIRST: t.Optional[bool] = None 105 FORCE_EARLY_ALIAS_REF_EXPANSION = True 106 107 UNESCAPED_SEQUENCES = { 108 "\\0": "\0", 109 } 110 111 class Tokenizer(tokens.Tokenizer): 112 COMMENTS = ["--", "#", "#!", ("/*", "*/")] 113 IDENTIFIERS = ['"', "`"] 114 STRING_ESCAPES = ["'", "\\"] 115 BIT_STRINGS = [("0b", "")] 116 HEX_STRINGS = [("0x", ""), ("0X", "")] 117 HEREDOC_STRINGS = ["$"] 118 119 KEYWORDS = { 120 **tokens.Tokenizer.KEYWORDS, 121 "ATTACH": TokenType.COMMAND, 122 "DATE32": TokenType.DATE32, 123 "DATETIME64": TokenType.DATETIME64, 124 "DICTIONARY": TokenType.DICTIONARY, 125 "ENUM8": TokenType.ENUM8, 126 "ENUM16": TokenType.ENUM16, 127 "FINAL": TokenType.FINAL, 128 "FIXEDSTRING": TokenType.FIXEDSTRING, 129 "FLOAT32": TokenType.FLOAT, 130 "FLOAT64": TokenType.DOUBLE, 131 "GLOBAL": TokenType.GLOBAL, 132 "INT256": TokenType.INT256, 133 "LOWCARDINALITY": TokenType.LOWCARDINALITY, 134 "MAP": TokenType.MAP, 135 "NESTED": TokenType.NESTED, 136 "SAMPLE": TokenType.TABLE_SAMPLE, 137 "TUPLE": TokenType.STRUCT, 138 "UINT128": TokenType.UINT128, 139 "UINT16": TokenType.USMALLINT, 140 "UINT256": TokenType.UINT256, 141 "UINT32": TokenType.UINT, 142 "UINT64": TokenType.UBIGINT, 143 "UINT8": TokenType.UTINYINT, 144 "IPV4": TokenType.IPV4, 145 "IPV6": TokenType.IPV6, 146 "AGGREGATEFUNCTION": TokenType.AGGREGATEFUNCTION, 147 "SIMPLEAGGREGATEFUNCTION": TokenType.SIMPLEAGGREGATEFUNCTION, 148 "SYSTEM": TokenType.COMMAND, 149 "PREWHERE": TokenType.PREWHERE, 150 } 151 KEYWORDS.pop("/*+") 152 153 SINGLE_TOKENS = { 154 **tokens.Tokenizer.SINGLE_TOKENS, 155 "$": TokenType.HEREDOC_STRING, 156 } 157 158 class Parser(parser.Parser): 159 # Tested in ClickHouse's playground, it seems that the following two queries do the same thing 160 # * select x from t1 union all select x from t2 limit 1; 161 # * select x from t1 union all (select x from t2 limit 1); 162 MODIFIERS_ATTACHED_TO_SET_OP = False 163 INTERVAL_SPANS = False 164 165 FUNCTIONS = { 166 **parser.Parser.FUNCTIONS, 167 "ANY": exp.AnyValue.from_arg_list, 168 "ARRAYSUM": exp.ArraySum.from_arg_list, 169 "COUNTIF": _build_count_if, 170 "DATE_ADD": build_date_delta(exp.DateAdd, default_unit=None), 171 "DATEADD": build_date_delta(exp.DateAdd, default_unit=None), 172 "DATE_DIFF": build_date_delta(exp.DateDiff, default_unit=None), 173 "DATEDIFF": build_date_delta(exp.DateDiff, default_unit=None), 174 "DATE_FORMAT": _build_date_format, 175 "DATE_SUB": build_date_delta(exp.DateSub, default_unit=None), 176 "DATESUB": build_date_delta(exp.DateSub, default_unit=None), 177 "FORMATDATETIME": _build_date_format, 178 "JSONEXTRACTSTRING": build_json_extract_path( 179 exp.JSONExtractScalar, zero_based_indexing=False 180 ), 181 "MAP": parser.build_var_map, 182 "MATCH": exp.RegexpLike.from_arg_list, 183 "RANDCANONICAL": exp.Rand.from_arg_list, 184 "TUPLE": exp.Struct.from_arg_list, 185 "TIMESTAMP_SUB": build_date_delta(exp.TimestampSub, default_unit=None), 186 "TIMESTAMPSUB": build_date_delta(exp.TimestampSub, default_unit=None), 187 "TIMESTAMP_ADD": build_date_delta(exp.TimestampAdd, default_unit=None), 188 "TIMESTAMPADD": build_date_delta(exp.TimestampAdd, default_unit=None), 189 "UNIQ": exp.ApproxDistinct.from_arg_list, 190 "XOR": lambda args: exp.Xor(expressions=args), 191 "MD5": exp.MD5Digest.from_arg_list, 192 "SHA256": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(256)), 193 "SHA512": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(512)), 194 } 195 196 AGG_FUNCTIONS = { 197 "count", 198 "min", 199 "max", 200 "sum", 201 "avg", 202 "any", 203 "stddevPop", 204 "stddevSamp", 205 "varPop", 206 "varSamp", 207 "corr", 208 "covarPop", 209 "covarSamp", 210 "entropy", 211 "exponentialMovingAverage", 212 "intervalLengthSum", 213 "kolmogorovSmirnovTest", 214 "mannWhitneyUTest", 215 "median", 216 "rankCorr", 217 "sumKahan", 218 "studentTTest", 219 "welchTTest", 220 "anyHeavy", 221 "anyLast", 222 "boundingRatio", 223 "first_value", 224 "last_value", 225 "argMin", 226 "argMax", 227 "avgWeighted", 228 "topK", 229 "topKWeighted", 230 "deltaSum", 231 "deltaSumTimestamp", 232 "groupArray", 233 "groupArrayLast", 234 "groupUniqArray", 235 "groupArrayInsertAt", 236 "groupArrayMovingAvg", 237 "groupArrayMovingSum", 238 "groupArraySample", 239 "groupBitAnd", 240 "groupBitOr", 241 "groupBitXor", 242 "groupBitmap", 243 "groupBitmapAnd", 244 "groupBitmapOr", 245 "groupBitmapXor", 246 "sumWithOverflow", 247 "sumMap", 248 "minMap", 249 "maxMap", 250 "skewSamp", 251 "skewPop", 252 "kurtSamp", 253 "kurtPop", 254 "uniq", 255 "uniqExact", 256 "uniqCombined", 257 "uniqCombined64", 258 "uniqHLL12", 259 "uniqTheta", 260 "quantile", 261 "quantiles", 262 "quantileExact", 263 "quantilesExact", 264 "quantileExactLow", 265 "quantilesExactLow", 266 "quantileExactHigh", 267 "quantilesExactHigh", 268 "quantileExactWeighted", 269 "quantilesExactWeighted", 270 "quantileTiming", 271 "quantilesTiming", 272 "quantileTimingWeighted", 273 "quantilesTimingWeighted", 274 "quantileDeterministic", 275 "quantilesDeterministic", 276 "quantileTDigest", 277 "quantilesTDigest", 278 "quantileTDigestWeighted", 279 "quantilesTDigestWeighted", 280 "quantileBFloat16", 281 "quantilesBFloat16", 282 "quantileBFloat16Weighted", 283 "quantilesBFloat16Weighted", 284 "simpleLinearRegression", 285 "stochasticLinearRegression", 286 "stochasticLogisticRegression", 287 "categoricalInformationValue", 288 "contingency", 289 "cramersV", 290 "cramersVBiasCorrected", 291 "theilsU", 292 "maxIntersections", 293 "maxIntersectionsPosition", 294 "meanZTest", 295 "quantileInterpolatedWeighted", 296 "quantilesInterpolatedWeighted", 297 "quantileGK", 298 "quantilesGK", 299 "sparkBar", 300 "sumCount", 301 "largestTriangleThreeBuckets", 302 "histogram", 303 "sequenceMatch", 304 "sequenceCount", 305 "windowFunnel", 306 "retention", 307 "uniqUpTo", 308 "sequenceNextNode", 309 "exponentialTimeDecayedAvg", 310 } 311 312 AGG_FUNCTIONS_SUFFIXES = [ 313 "If", 314 "Array", 315 "ArrayIf", 316 "Map", 317 "SimpleState", 318 "State", 319 "Merge", 320 "MergeState", 321 "ForEach", 322 "Distinct", 323 "OrDefault", 324 "OrNull", 325 "Resample", 326 "ArgMin", 327 "ArgMax", 328 ] 329 330 FUNC_TOKENS = { 331 *parser.Parser.FUNC_TOKENS, 332 TokenType.SET, 333 } 334 335 AGG_FUNC_MAPPING = ( 336 lambda functions, suffixes: { 337 f"{f}{sfx}": (f, sfx) for sfx in (suffixes + [""]) for f in functions 338 } 339 )(AGG_FUNCTIONS, AGG_FUNCTIONS_SUFFIXES) 340 341 FUNCTIONS_WITH_ALIASED_ARGS = {*parser.Parser.FUNCTIONS_WITH_ALIASED_ARGS, "TUPLE"} 342 343 FUNCTION_PARSERS = { 344 **parser.Parser.FUNCTION_PARSERS, 345 "ARRAYJOIN": lambda self: self.expression(exp.Explode, this=self._parse_expression()), 346 "QUANTILE": lambda self: self._parse_quantile(), 347 } 348 349 FUNCTION_PARSERS.pop("MATCH") 350 351 NO_PAREN_FUNCTION_PARSERS = parser.Parser.NO_PAREN_FUNCTION_PARSERS.copy() 352 NO_PAREN_FUNCTION_PARSERS.pop("ANY") 353 354 RANGE_PARSERS = { 355 **parser.Parser.RANGE_PARSERS, 356 TokenType.GLOBAL: lambda self, this: self._match(TokenType.IN) 357 and self._parse_in(this, is_global=True), 358 } 359 360 # The PLACEHOLDER entry is popped because 1) it doesn't affect Clickhouse (it corresponds to 361 # the postgres-specific JSONBContains parser) and 2) it makes parsing the ternary op simpler. 362 COLUMN_OPERATORS = parser.Parser.COLUMN_OPERATORS.copy() 363 COLUMN_OPERATORS.pop(TokenType.PLACEHOLDER) 364 365 JOIN_KINDS = { 366 *parser.Parser.JOIN_KINDS, 367 TokenType.ANY, 368 TokenType.ASOF, 369 TokenType.ARRAY, 370 } 371 372 TABLE_ALIAS_TOKENS = parser.Parser.TABLE_ALIAS_TOKENS - { 373 TokenType.ANY, 374 TokenType.ARRAY, 375 TokenType.FINAL, 376 TokenType.FORMAT, 377 TokenType.SETTINGS, 378 } 379 380 ALIAS_TOKENS = parser.Parser.ALIAS_TOKENS - { 381 TokenType.FORMAT, 382 } 383 384 LOG_DEFAULTS_TO_LN = True 385 386 QUERY_MODIFIER_PARSERS = { 387 **parser.Parser.QUERY_MODIFIER_PARSERS, 388 TokenType.SETTINGS: lambda self: ( 389 "settings", 390 self._advance() or self._parse_csv(self._parse_assignment), 391 ), 392 TokenType.FORMAT: lambda self: ("format", self._advance() or self._parse_id_var()), 393 } 394 395 CONSTRAINT_PARSERS = { 396 **parser.Parser.CONSTRAINT_PARSERS, 397 "INDEX": lambda self: self._parse_index_constraint(), 398 "CODEC": lambda self: self._parse_compress(), 399 } 400 401 ALTER_PARSERS = { 402 **parser.Parser.ALTER_PARSERS, 403 "REPLACE": lambda self: self._parse_alter_table_replace(), 404 } 405 406 SCHEMA_UNNAMED_CONSTRAINTS = { 407 *parser.Parser.SCHEMA_UNNAMED_CONSTRAINTS, 408 "INDEX", 409 } 410 411 def _parse_assignment(self) -> t.Optional[exp.Expression]: 412 this = super()._parse_assignment() 413 414 if self._match(TokenType.PLACEHOLDER): 415 return self.expression( 416 exp.If, 417 this=this, 418 true=self._parse_assignment(), 419 false=self._match(TokenType.COLON) and self._parse_assignment(), 420 ) 421 422 return this 423 424 def _parse_placeholder(self) -> t.Optional[exp.Expression]: 425 """ 426 Parse a placeholder expression like SELECT {abc: UInt32} or FROM {table: Identifier} 427 https://clickhouse.com/docs/en/sql-reference/syntax#defining-and-using-query-parameters 428 """ 429 if not self._match(TokenType.L_BRACE): 430 return None 431 432 this = self._parse_id_var() 433 self._match(TokenType.COLON) 434 kind = self._parse_types(check_func=False, allow_identifiers=False) or ( 435 self._match_text_seq("IDENTIFIER") and "Identifier" 436 ) 437 438 if not kind: 439 self.raise_error("Expecting a placeholder type or 'Identifier' for tables") 440 elif not self._match(TokenType.R_BRACE): 441 self.raise_error("Expecting }") 442 443 return self.expression(exp.Placeholder, this=this, kind=kind) 444 445 def _parse_in(self, this: t.Optional[exp.Expression], is_global: bool = False) -> exp.In: 446 this = super()._parse_in(this) 447 this.set("is_global", is_global) 448 return this 449 450 def _parse_table( 451 self, 452 schema: bool = False, 453 joins: bool = False, 454 alias_tokens: t.Optional[t.Collection[TokenType]] = None, 455 parse_bracket: bool = False, 456 is_db_reference: bool = False, 457 parse_partition: bool = False, 458 ) -> t.Optional[exp.Expression]: 459 this = super()._parse_table( 460 schema=schema, 461 joins=joins, 462 alias_tokens=alias_tokens, 463 parse_bracket=parse_bracket, 464 is_db_reference=is_db_reference, 465 ) 466 467 if self._match(TokenType.FINAL): 468 this = self.expression(exp.Final, this=this) 469 470 return this 471 472 def _parse_position(self, haystack_first: bool = False) -> exp.StrPosition: 473 return super()._parse_position(haystack_first=True) 474 475 # https://clickhouse.com/docs/en/sql-reference/statements/select/with/ 476 def _parse_cte(self) -> exp.CTE: 477 # WITH <identifier> AS <subquery expression> 478 cte: t.Optional[exp.CTE] = self._try_parse(super()._parse_cte) 479 480 if not cte: 481 # WITH <expression> AS <identifier> 482 cte = self.expression( 483 exp.CTE, 484 this=self._parse_assignment(), 485 alias=self._parse_table_alias(), 486 scalar=True, 487 ) 488 489 return cte 490 491 def _parse_join_parts( 492 self, 493 ) -> t.Tuple[t.Optional[Token], t.Optional[Token], t.Optional[Token]]: 494 is_global = self._match(TokenType.GLOBAL) and self._prev 495 kind_pre = self._match_set(self.JOIN_KINDS, advance=False) and self._prev 496 497 if kind_pre: 498 kind = self._match_set(self.JOIN_KINDS) and self._prev 499 side = self._match_set(self.JOIN_SIDES) and self._prev 500 return is_global, side, kind 501 502 return ( 503 is_global, 504 self._match_set(self.JOIN_SIDES) and self._prev, 505 self._match_set(self.JOIN_KINDS) and self._prev, 506 ) 507 508 def _parse_join( 509 self, skip_join_token: bool = False, parse_bracket: bool = False 510 ) -> t.Optional[exp.Join]: 511 join = super()._parse_join(skip_join_token=skip_join_token, parse_bracket=True) 512 if join: 513 join.set("global", join.args.pop("method", None)) 514 515 return join 516 517 def _parse_function( 518 self, 519 functions: t.Optional[t.Dict[str, t.Callable]] = None, 520 anonymous: bool = False, 521 optional_parens: bool = True, 522 any_token: bool = False, 523 ) -> t.Optional[exp.Expression]: 524 expr = super()._parse_function( 525 functions=functions, 526 anonymous=anonymous, 527 optional_parens=optional_parens, 528 any_token=any_token, 529 ) 530 531 func = expr.this if isinstance(expr, exp.Window) else expr 532 533 # Aggregate functions can be split in 2 parts: <func_name><suffix> 534 parts = ( 535 self.AGG_FUNC_MAPPING.get(func.this) if isinstance(func, exp.Anonymous) else None 536 ) 537 538 if parts: 539 params = self._parse_func_params(func) 540 541 kwargs = { 542 "this": func.this, 543 "expressions": func.expressions, 544 } 545 if parts[1]: 546 kwargs["parts"] = parts 547 exp_class = exp.CombinedParameterizedAgg if params else exp.CombinedAggFunc 548 else: 549 exp_class = exp.ParameterizedAgg if params else exp.AnonymousAggFunc 550 551 kwargs["exp_class"] = exp_class 552 if params: 553 kwargs["params"] = params 554 555 func = self.expression(**kwargs) 556 557 if isinstance(expr, exp.Window): 558 # The window's func was parsed as Anonymous in base parser, fix its 559 # type to be CH style CombinedAnonymousAggFunc / AnonymousAggFunc 560 expr.set("this", func) 561 elif params: 562 # Params have blocked super()._parse_function() from parsing the following window 563 # (if that exists) as they're standing between the function call and the window spec 564 expr = self._parse_window(func) 565 else: 566 expr = func 567 568 return expr 569 570 def _parse_func_params( 571 self, this: t.Optional[exp.Func] = None 572 ) -> t.Optional[t.List[exp.Expression]]: 573 if self._match_pair(TokenType.R_PAREN, TokenType.L_PAREN): 574 return self._parse_csv(self._parse_lambda) 575 576 if self._match(TokenType.L_PAREN): 577 params = self._parse_csv(self._parse_lambda) 578 self._match_r_paren(this) 579 return params 580 581 return None 582 583 def _parse_quantile(self) -> exp.Quantile: 584 this = self._parse_lambda() 585 params = self._parse_func_params() 586 if params: 587 return self.expression(exp.Quantile, this=params[0], quantile=this) 588 return self.expression(exp.Quantile, this=this, quantile=exp.Literal.number(0.5)) 589 590 def _parse_wrapped_id_vars(self, optional: bool = False) -> t.List[exp.Expression]: 591 return super()._parse_wrapped_id_vars(optional=True) 592 593 def _parse_primary_key( 594 self, wrapped_optional: bool = False, in_props: bool = False 595 ) -> exp.PrimaryKeyColumnConstraint | exp.PrimaryKey: 596 return super()._parse_primary_key( 597 wrapped_optional=wrapped_optional or in_props, in_props=in_props 598 ) 599 600 def _parse_on_property(self) -> t.Optional[exp.Expression]: 601 index = self._index 602 if self._match_text_seq("CLUSTER"): 603 this = self._parse_id_var() 604 if this: 605 return self.expression(exp.OnCluster, this=this) 606 else: 607 self._retreat(index) 608 return None 609 610 def _parse_index_constraint( 611 self, kind: t.Optional[str] = None 612 ) -> exp.IndexColumnConstraint: 613 # INDEX name1 expr TYPE type1(args) GRANULARITY value 614 this = self._parse_id_var() 615 expression = self._parse_assignment() 616 617 index_type = self._match_text_seq("TYPE") and ( 618 self._parse_function() or self._parse_var() 619 ) 620 621 granularity = self._match_text_seq("GRANULARITY") and self._parse_term() 622 623 return self.expression( 624 exp.IndexColumnConstraint, 625 this=this, 626 expression=expression, 627 index_type=index_type, 628 granularity=granularity, 629 ) 630 631 def _parse_partition(self) -> t.Optional[exp.Partition]: 632 # https://clickhouse.com/docs/en/sql-reference/statements/alter/partition#how-to-set-partition-expression 633 if not self._match(TokenType.PARTITION): 634 return None 635 636 if self._match_text_seq("ID"): 637 # Corresponds to the PARTITION ID <string_value> syntax 638 expressions: t.List[exp.Expression] = [ 639 self.expression(exp.PartitionId, this=self._parse_string()) 640 ] 641 else: 642 expressions = self._parse_expressions() 643 644 return self.expression(exp.Partition, expressions=expressions) 645 646 def _parse_alter_table_replace(self) -> t.Optional[exp.Expression]: 647 partition = self._parse_partition() 648 649 if not partition or not self._match(TokenType.FROM): 650 return None 651 652 return self.expression( 653 exp.ReplacePartition, expression=partition, source=self._parse_table_parts() 654 ) 655 656 def _parse_projection_def(self) -> t.Optional[exp.ProjectionDef]: 657 if not self._match_text_seq("PROJECTION"): 658 return None 659 660 return self.expression( 661 exp.ProjectionDef, 662 this=self._parse_id_var(), 663 expression=self._parse_wrapped(self._parse_statement), 664 ) 665 666 def _parse_constraint(self) -> t.Optional[exp.Expression]: 667 return super()._parse_constraint() or self._parse_projection_def() 668 669 class Generator(generator.Generator): 670 QUERY_HINTS = False 671 STRUCT_DELIMITER = ("(", ")") 672 NVL2_SUPPORTED = False 673 TABLESAMPLE_REQUIRES_PARENS = False 674 TABLESAMPLE_SIZE_IS_ROWS = False 675 TABLESAMPLE_KEYWORDS = "SAMPLE" 676 LAST_DAY_SUPPORTS_DATE_PART = False 677 CAN_IMPLEMENT_ARRAY_ANY = True 678 SUPPORTS_TO_NUMBER = False 679 JOIN_HINTS = False 680 TABLE_HINTS = False 681 EXPLICIT_SET_OP = True 682 GROUPINGS_SEP = "" 683 SET_OP_MODIFIERS = False 684 685 STRING_TYPE_MAPPING = { 686 exp.DataType.Type.CHAR: "String", 687 exp.DataType.Type.LONGBLOB: "String", 688 exp.DataType.Type.LONGTEXT: "String", 689 exp.DataType.Type.MEDIUMBLOB: "String", 690 exp.DataType.Type.MEDIUMTEXT: "String", 691 exp.DataType.Type.TINYBLOB: "String", 692 exp.DataType.Type.TINYTEXT: "String", 693 exp.DataType.Type.TEXT: "String", 694 exp.DataType.Type.VARBINARY: "String", 695 exp.DataType.Type.VARCHAR: "String", 696 } 697 698 SUPPORTED_JSON_PATH_PARTS = { 699 exp.JSONPathKey, 700 exp.JSONPathRoot, 701 exp.JSONPathSubscript, 702 } 703 704 TYPE_MAPPING = { 705 **generator.Generator.TYPE_MAPPING, 706 **STRING_TYPE_MAPPING, 707 exp.DataType.Type.ARRAY: "Array", 708 exp.DataType.Type.BIGINT: "Int64", 709 exp.DataType.Type.DATE32: "Date32", 710 exp.DataType.Type.DATETIME64: "DateTime64", 711 exp.DataType.Type.DOUBLE: "Float64", 712 exp.DataType.Type.ENUM: "Enum", 713 exp.DataType.Type.ENUM8: "Enum8", 714 exp.DataType.Type.ENUM16: "Enum16", 715 exp.DataType.Type.FIXEDSTRING: "FixedString", 716 exp.DataType.Type.FLOAT: "Float32", 717 exp.DataType.Type.INT: "Int32", 718 exp.DataType.Type.MEDIUMINT: "Int32", 719 exp.DataType.Type.INT128: "Int128", 720 exp.DataType.Type.INT256: "Int256", 721 exp.DataType.Type.LOWCARDINALITY: "LowCardinality", 722 exp.DataType.Type.MAP: "Map", 723 exp.DataType.Type.NESTED: "Nested", 724 exp.DataType.Type.NULLABLE: "Nullable", 725 exp.DataType.Type.SMALLINT: "Int16", 726 exp.DataType.Type.STRUCT: "Tuple", 727 exp.DataType.Type.TINYINT: "Int8", 728 exp.DataType.Type.UBIGINT: "UInt64", 729 exp.DataType.Type.UINT: "UInt32", 730 exp.DataType.Type.UINT128: "UInt128", 731 exp.DataType.Type.UINT256: "UInt256", 732 exp.DataType.Type.USMALLINT: "UInt16", 733 exp.DataType.Type.UTINYINT: "UInt8", 734 exp.DataType.Type.IPV4: "IPv4", 735 exp.DataType.Type.IPV6: "IPv6", 736 exp.DataType.Type.AGGREGATEFUNCTION: "AggregateFunction", 737 exp.DataType.Type.SIMPLEAGGREGATEFUNCTION: "SimpleAggregateFunction", 738 } 739 740 TRANSFORMS = { 741 **generator.Generator.TRANSFORMS, 742 exp.AnyValue: rename_func("any"), 743 exp.ApproxDistinct: rename_func("uniq"), 744 exp.ArrayFilter: lambda self, e: self.func("arrayFilter", e.expression, e.this), 745 exp.ArraySize: rename_func("LENGTH"), 746 exp.ArraySum: rename_func("arraySum"), 747 exp.ArgMax: arg_max_or_min_no_count("argMax"), 748 exp.ArgMin: arg_max_or_min_no_count("argMin"), 749 exp.Array: inline_array_sql, 750 exp.CastToStrType: rename_func("CAST"), 751 exp.CountIf: rename_func("countIf"), 752 exp.CompressColumnConstraint: lambda self, 753 e: f"CODEC({self.expressions(e, key='this', flat=True)})", 754 exp.ComputedColumnConstraint: lambda self, 755 e: f"{'MATERIALIZED' if e.args.get('persisted') else 'ALIAS'} {self.sql(e, 'this')}", 756 exp.CurrentDate: lambda self, e: self.func("CURRENT_DATE"), 757 exp.DateAdd: _datetime_delta_sql("DATE_ADD"), 758 exp.DateDiff: _datetime_delta_sql("DATE_DIFF"), 759 exp.DateSub: _datetime_delta_sql("DATE_SUB"), 760 exp.Explode: rename_func("arrayJoin"), 761 exp.Final: lambda self, e: f"{self.sql(e, 'this')} FINAL", 762 exp.IsNan: rename_func("isNaN"), 763 exp.JSONExtract: json_extract_segments("JSONExtractString", quoted_index=False), 764 exp.JSONExtractScalar: json_extract_segments("JSONExtractString", quoted_index=False), 765 exp.JSONPathKey: json_path_key_only_name, 766 exp.JSONPathRoot: lambda *_: "", 767 exp.Map: lambda self, e: _lower_func(var_map_sql(self, e)), 768 exp.Nullif: rename_func("nullIf"), 769 exp.PartitionedByProperty: lambda self, e: f"PARTITION BY {self.sql(e, 'this')}", 770 exp.Pivot: no_pivot_sql, 771 exp.Quantile: _quantile_sql, 772 exp.RegexpLike: lambda self, e: self.func("match", e.this, e.expression), 773 exp.Rand: rename_func("randCanonical"), 774 exp.Select: transforms.preprocess([transforms.eliminate_qualify]), 775 exp.StartsWith: rename_func("startsWith"), 776 exp.StrPosition: lambda self, e: self.func( 777 "position", e.this, e.args.get("substr"), e.args.get("position") 778 ), 779 exp.TimeToStr: lambda self, e: self.func( 780 "DATE_FORMAT", e.this, self.format_time(e), e.args.get("timezone") 781 ), 782 exp.TimestampAdd: _datetime_delta_sql("TIMESTAMP_ADD"), 783 exp.TimestampSub: _datetime_delta_sql("TIMESTAMP_SUB"), 784 exp.VarMap: lambda self, e: _lower_func(var_map_sql(self, e)), 785 exp.Xor: lambda self, e: self.func("xor", e.this, e.expression, *e.expressions), 786 exp.MD5Digest: rename_func("MD5"), 787 exp.MD5: lambda self, e: self.func("LOWER", self.func("HEX", self.func("MD5", e.this))), 788 exp.SHA: rename_func("SHA1"), 789 exp.SHA2: sha256_sql, 790 exp.UnixToTime: _unix_to_time_sql, 791 exp.TimestampTrunc: timestamptrunc_sql(zone=True), 792 exp.Variance: rename_func("varSamp"), 793 exp.Stddev: rename_func("stddevSamp"), 794 } 795 796 PROPERTIES_LOCATION = { 797 **generator.Generator.PROPERTIES_LOCATION, 798 exp.VolatileProperty: exp.Properties.Location.UNSUPPORTED, 799 exp.PartitionedByProperty: exp.Properties.Location.POST_SCHEMA, 800 exp.OnCluster: exp.Properties.Location.POST_NAME, 801 } 802 803 # there's no list in docs, but it can be found in Clickhouse code 804 # see `ClickHouse/src/Parsers/ParserCreate*.cpp` 805 ON_CLUSTER_TARGETS = { 806 "DATABASE", 807 "TABLE", 808 "VIEW", 809 "DICTIONARY", 810 "INDEX", 811 "FUNCTION", 812 "NAMED COLLECTION", 813 } 814 815 def _jsonpathsubscript_sql(self, expression: exp.JSONPathSubscript) -> str: 816 this = self.json_path_part(expression.this) 817 return str(int(this) + 1) if is_int(this) else this 818 819 def likeproperty_sql(self, expression: exp.LikeProperty) -> str: 820 return f"AS {self.sql(expression, 'this')}" 821 822 def _any_to_has( 823 self, 824 expression: exp.EQ | exp.NEQ, 825 default: t.Callable[[t.Any], str], 826 prefix: str = "", 827 ) -> str: 828 if isinstance(expression.left, exp.Any): 829 arr = expression.left 830 this = expression.right 831 elif isinstance(expression.right, exp.Any): 832 arr = expression.right 833 this = expression.left 834 else: 835 return default(expression) 836 837 return prefix + self.func("has", arr.this.unnest(), this) 838 839 def eq_sql(self, expression: exp.EQ) -> str: 840 return self._any_to_has(expression, super().eq_sql) 841 842 def neq_sql(self, expression: exp.NEQ) -> str: 843 return self._any_to_has(expression, super().neq_sql, "NOT ") 844 845 def regexpilike_sql(self, expression: exp.RegexpILike) -> str: 846 # Manually add a flag to make the search case-insensitive 847 regex = self.func("CONCAT", "'(?i)'", expression.expression) 848 return self.func("match", expression.this, regex) 849 850 def datatype_sql(self, expression: exp.DataType) -> str: 851 # String is the standard ClickHouse type, every other variant is just an alias. 852 # Additionally, any supplied length parameter will be ignored. 853 # 854 # https://clickhouse.com/docs/en/sql-reference/data-types/string 855 if expression.this in self.STRING_TYPE_MAPPING: 856 return "String" 857 858 return super().datatype_sql(expression) 859 860 def cte_sql(self, expression: exp.CTE) -> str: 861 if expression.args.get("scalar"): 862 this = self.sql(expression, "this") 863 alias = self.sql(expression, "alias") 864 return f"{this} AS {alias}" 865 866 return super().cte_sql(expression) 867 868 def after_limit_modifiers(self, expression: exp.Expression) -> t.List[str]: 869 return super().after_limit_modifiers(expression) + [ 870 ( 871 self.seg("SETTINGS ") + self.expressions(expression, key="settings", flat=True) 872 if expression.args.get("settings") 873 else "" 874 ), 875 ( 876 self.seg("FORMAT ") + self.sql(expression, "format") 877 if expression.args.get("format") 878 else "" 879 ), 880 ] 881 882 def parameterizedagg_sql(self, expression: exp.ParameterizedAgg) -> str: 883 params = self.expressions(expression, key="params", flat=True) 884 return self.func(expression.name, *expression.expressions) + f"({params})" 885 886 def anonymousaggfunc_sql(self, expression: exp.AnonymousAggFunc) -> str: 887 return self.func(expression.name, *expression.expressions) 888 889 def combinedaggfunc_sql(self, expression: exp.CombinedAggFunc) -> str: 890 return self.anonymousaggfunc_sql(expression) 891 892 def combinedparameterizedagg_sql(self, expression: exp.CombinedParameterizedAgg) -> str: 893 return self.parameterizedagg_sql(expression) 894 895 def placeholder_sql(self, expression: exp.Placeholder) -> str: 896 return f"{{{expression.name}: {self.sql(expression, 'kind')}}}" 897 898 def oncluster_sql(self, expression: exp.OnCluster) -> str: 899 return f"ON CLUSTER {self.sql(expression, 'this')}" 900 901 def createable_sql(self, expression: exp.Create, locations: t.DefaultDict) -> str: 902 if expression.kind in self.ON_CLUSTER_TARGETS and locations.get( 903 exp.Properties.Location.POST_NAME 904 ): 905 this_name = self.sql(expression.this, "this") 906 this_properties = " ".join( 907 [self.sql(prop) for prop in locations[exp.Properties.Location.POST_NAME]] 908 ) 909 this_schema = self.schema_columns_sql(expression.this) 910 return f"{this_name}{self.sep()}{this_properties}{self.sep()}{this_schema}" 911 912 return super().createable_sql(expression, locations) 913 914 def prewhere_sql(self, expression: exp.PreWhere) -> str: 915 this = self.indent(self.sql(expression, "this")) 916 return f"{self.seg('PREWHERE')}{self.sep()}{this}" 917 918 def indexcolumnconstraint_sql(self, expression: exp.IndexColumnConstraint) -> str: 919 this = self.sql(expression, "this") 920 this = f" {this}" if this else "" 921 expr = self.sql(expression, "expression") 922 expr = f" {expr}" if expr else "" 923 index_type = self.sql(expression, "index_type") 924 index_type = f" TYPE {index_type}" if index_type else "" 925 granularity = self.sql(expression, "granularity") 926 granularity = f" GRANULARITY {granularity}" if granularity else "" 927 928 return f"INDEX{this}{expr}{index_type}{granularity}" 929 930 def partition_sql(self, expression: exp.Partition) -> str: 931 return f"PARTITION {self.expressions(expression, flat=True)}" 932 933 def partitionid_sql(self, expression: exp.PartitionId) -> str: 934 return f"ID {self.sql(expression.this)}" 935 936 def replacepartition_sql(self, expression: exp.ReplacePartition) -> str: 937 return ( 938 f"REPLACE {self.sql(expression.expression)} FROM {self.sql(expression, 'source')}" 939 ) 940 941 def projectiondef_sql(self, expression: exp.ProjectionDef) -> str: 942 return f"PROJECTION {self.sql(expression.this)} {self.wrap(expression.expression)}"
100class ClickHouse(Dialect): 101 NORMALIZE_FUNCTIONS: bool | str = False 102 NULL_ORDERING = "nulls_are_last" 103 SUPPORTS_USER_DEFINED_TYPES = False 104 SAFE_DIVISION = True 105 LOG_BASE_FIRST: t.Optional[bool] = None 106 FORCE_EARLY_ALIAS_REF_EXPANSION = True 107 108 UNESCAPED_SEQUENCES = { 109 "\\0": "\0", 110 } 111 112 class Tokenizer(tokens.Tokenizer): 113 COMMENTS = ["--", "#", "#!", ("/*", "*/")] 114 IDENTIFIERS = ['"', "`"] 115 STRING_ESCAPES = ["'", "\\"] 116 BIT_STRINGS = [("0b", "")] 117 HEX_STRINGS = [("0x", ""), ("0X", "")] 118 HEREDOC_STRINGS = ["$"] 119 120 KEYWORDS = { 121 **tokens.Tokenizer.KEYWORDS, 122 "ATTACH": TokenType.COMMAND, 123 "DATE32": TokenType.DATE32, 124 "DATETIME64": TokenType.DATETIME64, 125 "DICTIONARY": TokenType.DICTIONARY, 126 "ENUM8": TokenType.ENUM8, 127 "ENUM16": TokenType.ENUM16, 128 "FINAL": TokenType.FINAL, 129 "FIXEDSTRING": TokenType.FIXEDSTRING, 130 "FLOAT32": TokenType.FLOAT, 131 "FLOAT64": TokenType.DOUBLE, 132 "GLOBAL": TokenType.GLOBAL, 133 "INT256": TokenType.INT256, 134 "LOWCARDINALITY": TokenType.LOWCARDINALITY, 135 "MAP": TokenType.MAP, 136 "NESTED": TokenType.NESTED, 137 "SAMPLE": TokenType.TABLE_SAMPLE, 138 "TUPLE": TokenType.STRUCT, 139 "UINT128": TokenType.UINT128, 140 "UINT16": TokenType.USMALLINT, 141 "UINT256": TokenType.UINT256, 142 "UINT32": TokenType.UINT, 143 "UINT64": TokenType.UBIGINT, 144 "UINT8": TokenType.UTINYINT, 145 "IPV4": TokenType.IPV4, 146 "IPV6": TokenType.IPV6, 147 "AGGREGATEFUNCTION": TokenType.AGGREGATEFUNCTION, 148 "SIMPLEAGGREGATEFUNCTION": TokenType.SIMPLEAGGREGATEFUNCTION, 149 "SYSTEM": TokenType.COMMAND, 150 "PREWHERE": TokenType.PREWHERE, 151 } 152 KEYWORDS.pop("/*+") 153 154 SINGLE_TOKENS = { 155 **tokens.Tokenizer.SINGLE_TOKENS, 156 "$": TokenType.HEREDOC_STRING, 157 } 158 159 class Parser(parser.Parser): 160 # Tested in ClickHouse's playground, it seems that the following two queries do the same thing 161 # * select x from t1 union all select x from t2 limit 1; 162 # * select x from t1 union all (select x from t2 limit 1); 163 MODIFIERS_ATTACHED_TO_SET_OP = False 164 INTERVAL_SPANS = False 165 166 FUNCTIONS = { 167 **parser.Parser.FUNCTIONS, 168 "ANY": exp.AnyValue.from_arg_list, 169 "ARRAYSUM": exp.ArraySum.from_arg_list, 170 "COUNTIF": _build_count_if, 171 "DATE_ADD": build_date_delta(exp.DateAdd, default_unit=None), 172 "DATEADD": build_date_delta(exp.DateAdd, default_unit=None), 173 "DATE_DIFF": build_date_delta(exp.DateDiff, default_unit=None), 174 "DATEDIFF": build_date_delta(exp.DateDiff, default_unit=None), 175 "DATE_FORMAT": _build_date_format, 176 "DATE_SUB": build_date_delta(exp.DateSub, default_unit=None), 177 "DATESUB": build_date_delta(exp.DateSub, default_unit=None), 178 "FORMATDATETIME": _build_date_format, 179 "JSONEXTRACTSTRING": build_json_extract_path( 180 exp.JSONExtractScalar, zero_based_indexing=False 181 ), 182 "MAP": parser.build_var_map, 183 "MATCH": exp.RegexpLike.from_arg_list, 184 "RANDCANONICAL": exp.Rand.from_arg_list, 185 "TUPLE": exp.Struct.from_arg_list, 186 "TIMESTAMP_SUB": build_date_delta(exp.TimestampSub, default_unit=None), 187 "TIMESTAMPSUB": build_date_delta(exp.TimestampSub, default_unit=None), 188 "TIMESTAMP_ADD": build_date_delta(exp.TimestampAdd, default_unit=None), 189 "TIMESTAMPADD": build_date_delta(exp.TimestampAdd, default_unit=None), 190 "UNIQ": exp.ApproxDistinct.from_arg_list, 191 "XOR": lambda args: exp.Xor(expressions=args), 192 "MD5": exp.MD5Digest.from_arg_list, 193 "SHA256": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(256)), 194 "SHA512": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(512)), 195 } 196 197 AGG_FUNCTIONS = { 198 "count", 199 "min", 200 "max", 201 "sum", 202 "avg", 203 "any", 204 "stddevPop", 205 "stddevSamp", 206 "varPop", 207 "varSamp", 208 "corr", 209 "covarPop", 210 "covarSamp", 211 "entropy", 212 "exponentialMovingAverage", 213 "intervalLengthSum", 214 "kolmogorovSmirnovTest", 215 "mannWhitneyUTest", 216 "median", 217 "rankCorr", 218 "sumKahan", 219 "studentTTest", 220 "welchTTest", 221 "anyHeavy", 222 "anyLast", 223 "boundingRatio", 224 "first_value", 225 "last_value", 226 "argMin", 227 "argMax", 228 "avgWeighted", 229 "topK", 230 "topKWeighted", 231 "deltaSum", 232 "deltaSumTimestamp", 233 "groupArray", 234 "groupArrayLast", 235 "groupUniqArray", 236 "groupArrayInsertAt", 237 "groupArrayMovingAvg", 238 "groupArrayMovingSum", 239 "groupArraySample", 240 "groupBitAnd", 241 "groupBitOr", 242 "groupBitXor", 243 "groupBitmap", 244 "groupBitmapAnd", 245 "groupBitmapOr", 246 "groupBitmapXor", 247 "sumWithOverflow", 248 "sumMap", 249 "minMap", 250 "maxMap", 251 "skewSamp", 252 "skewPop", 253 "kurtSamp", 254 "kurtPop", 255 "uniq", 256 "uniqExact", 257 "uniqCombined", 258 "uniqCombined64", 259 "uniqHLL12", 260 "uniqTheta", 261 "quantile", 262 "quantiles", 263 "quantileExact", 264 "quantilesExact", 265 "quantileExactLow", 266 "quantilesExactLow", 267 "quantileExactHigh", 268 "quantilesExactHigh", 269 "quantileExactWeighted", 270 "quantilesExactWeighted", 271 "quantileTiming", 272 "quantilesTiming", 273 "quantileTimingWeighted", 274 "quantilesTimingWeighted", 275 "quantileDeterministic", 276 "quantilesDeterministic", 277 "quantileTDigest", 278 "quantilesTDigest", 279 "quantileTDigestWeighted", 280 "quantilesTDigestWeighted", 281 "quantileBFloat16", 282 "quantilesBFloat16", 283 "quantileBFloat16Weighted", 284 "quantilesBFloat16Weighted", 285 "simpleLinearRegression", 286 "stochasticLinearRegression", 287 "stochasticLogisticRegression", 288 "categoricalInformationValue", 289 "contingency", 290 "cramersV", 291 "cramersVBiasCorrected", 292 "theilsU", 293 "maxIntersections", 294 "maxIntersectionsPosition", 295 "meanZTest", 296 "quantileInterpolatedWeighted", 297 "quantilesInterpolatedWeighted", 298 "quantileGK", 299 "quantilesGK", 300 "sparkBar", 301 "sumCount", 302 "largestTriangleThreeBuckets", 303 "histogram", 304 "sequenceMatch", 305 "sequenceCount", 306 "windowFunnel", 307 "retention", 308 "uniqUpTo", 309 "sequenceNextNode", 310 "exponentialTimeDecayedAvg", 311 } 312 313 AGG_FUNCTIONS_SUFFIXES = [ 314 "If", 315 "Array", 316 "ArrayIf", 317 "Map", 318 "SimpleState", 319 "State", 320 "Merge", 321 "MergeState", 322 "ForEach", 323 "Distinct", 324 "OrDefault", 325 "OrNull", 326 "Resample", 327 "ArgMin", 328 "ArgMax", 329 ] 330 331 FUNC_TOKENS = { 332 *parser.Parser.FUNC_TOKENS, 333 TokenType.SET, 334 } 335 336 AGG_FUNC_MAPPING = ( 337 lambda functions, suffixes: { 338 f"{f}{sfx}": (f, sfx) for sfx in (suffixes + [""]) for f in functions 339 } 340 )(AGG_FUNCTIONS, AGG_FUNCTIONS_SUFFIXES) 341 342 FUNCTIONS_WITH_ALIASED_ARGS = {*parser.Parser.FUNCTIONS_WITH_ALIASED_ARGS, "TUPLE"} 343 344 FUNCTION_PARSERS = { 345 **parser.Parser.FUNCTION_PARSERS, 346 "ARRAYJOIN": lambda self: self.expression(exp.Explode, this=self._parse_expression()), 347 "QUANTILE": lambda self: self._parse_quantile(), 348 } 349 350 FUNCTION_PARSERS.pop("MATCH") 351 352 NO_PAREN_FUNCTION_PARSERS = parser.Parser.NO_PAREN_FUNCTION_PARSERS.copy() 353 NO_PAREN_FUNCTION_PARSERS.pop("ANY") 354 355 RANGE_PARSERS = { 356 **parser.Parser.RANGE_PARSERS, 357 TokenType.GLOBAL: lambda self, this: self._match(TokenType.IN) 358 and self._parse_in(this, is_global=True), 359 } 360 361 # The PLACEHOLDER entry is popped because 1) it doesn't affect Clickhouse (it corresponds to 362 # the postgres-specific JSONBContains parser) and 2) it makes parsing the ternary op simpler. 363 COLUMN_OPERATORS = parser.Parser.COLUMN_OPERATORS.copy() 364 COLUMN_OPERATORS.pop(TokenType.PLACEHOLDER) 365 366 JOIN_KINDS = { 367 *parser.Parser.JOIN_KINDS, 368 TokenType.ANY, 369 TokenType.ASOF, 370 TokenType.ARRAY, 371 } 372 373 TABLE_ALIAS_TOKENS = parser.Parser.TABLE_ALIAS_TOKENS - { 374 TokenType.ANY, 375 TokenType.ARRAY, 376 TokenType.FINAL, 377 TokenType.FORMAT, 378 TokenType.SETTINGS, 379 } 380 381 ALIAS_TOKENS = parser.Parser.ALIAS_TOKENS - { 382 TokenType.FORMAT, 383 } 384 385 LOG_DEFAULTS_TO_LN = True 386 387 QUERY_MODIFIER_PARSERS = { 388 **parser.Parser.QUERY_MODIFIER_PARSERS, 389 TokenType.SETTINGS: lambda self: ( 390 "settings", 391 self._advance() or self._parse_csv(self._parse_assignment), 392 ), 393 TokenType.FORMAT: lambda self: ("format", self._advance() or self._parse_id_var()), 394 } 395 396 CONSTRAINT_PARSERS = { 397 **parser.Parser.CONSTRAINT_PARSERS, 398 "INDEX": lambda self: self._parse_index_constraint(), 399 "CODEC": lambda self: self._parse_compress(), 400 } 401 402 ALTER_PARSERS = { 403 **parser.Parser.ALTER_PARSERS, 404 "REPLACE": lambda self: self._parse_alter_table_replace(), 405 } 406 407 SCHEMA_UNNAMED_CONSTRAINTS = { 408 *parser.Parser.SCHEMA_UNNAMED_CONSTRAINTS, 409 "INDEX", 410 } 411 412 def _parse_assignment(self) -> t.Optional[exp.Expression]: 413 this = super()._parse_assignment() 414 415 if self._match(TokenType.PLACEHOLDER): 416 return self.expression( 417 exp.If, 418 this=this, 419 true=self._parse_assignment(), 420 false=self._match(TokenType.COLON) and self._parse_assignment(), 421 ) 422 423 return this 424 425 def _parse_placeholder(self) -> t.Optional[exp.Expression]: 426 """ 427 Parse a placeholder expression like SELECT {abc: UInt32} or FROM {table: Identifier} 428 https://clickhouse.com/docs/en/sql-reference/syntax#defining-and-using-query-parameters 429 """ 430 if not self._match(TokenType.L_BRACE): 431 return None 432 433 this = self._parse_id_var() 434 self._match(TokenType.COLON) 435 kind = self._parse_types(check_func=False, allow_identifiers=False) or ( 436 self._match_text_seq("IDENTIFIER") and "Identifier" 437 ) 438 439 if not kind: 440 self.raise_error("Expecting a placeholder type or 'Identifier' for tables") 441 elif not self._match(TokenType.R_BRACE): 442 self.raise_error("Expecting }") 443 444 return self.expression(exp.Placeholder, this=this, kind=kind) 445 446 def _parse_in(self, this: t.Optional[exp.Expression], is_global: bool = False) -> exp.In: 447 this = super()._parse_in(this) 448 this.set("is_global", is_global) 449 return this 450 451 def _parse_table( 452 self, 453 schema: bool = False, 454 joins: bool = False, 455 alias_tokens: t.Optional[t.Collection[TokenType]] = None, 456 parse_bracket: bool = False, 457 is_db_reference: bool = False, 458 parse_partition: bool = False, 459 ) -> t.Optional[exp.Expression]: 460 this = super()._parse_table( 461 schema=schema, 462 joins=joins, 463 alias_tokens=alias_tokens, 464 parse_bracket=parse_bracket, 465 is_db_reference=is_db_reference, 466 ) 467 468 if self._match(TokenType.FINAL): 469 this = self.expression(exp.Final, this=this) 470 471 return this 472 473 def _parse_position(self, haystack_first: bool = False) -> exp.StrPosition: 474 return super()._parse_position(haystack_first=True) 475 476 # https://clickhouse.com/docs/en/sql-reference/statements/select/with/ 477 def _parse_cte(self) -> exp.CTE: 478 # WITH <identifier> AS <subquery expression> 479 cte: t.Optional[exp.CTE] = self._try_parse(super()._parse_cte) 480 481 if not cte: 482 # WITH <expression> AS <identifier> 483 cte = self.expression( 484 exp.CTE, 485 this=self._parse_assignment(), 486 alias=self._parse_table_alias(), 487 scalar=True, 488 ) 489 490 return cte 491 492 def _parse_join_parts( 493 self, 494 ) -> t.Tuple[t.Optional[Token], t.Optional[Token], t.Optional[Token]]: 495 is_global = self._match(TokenType.GLOBAL) and self._prev 496 kind_pre = self._match_set(self.JOIN_KINDS, advance=False) and self._prev 497 498 if kind_pre: 499 kind = self._match_set(self.JOIN_KINDS) and self._prev 500 side = self._match_set(self.JOIN_SIDES) and self._prev 501 return is_global, side, kind 502 503 return ( 504 is_global, 505 self._match_set(self.JOIN_SIDES) and self._prev, 506 self._match_set(self.JOIN_KINDS) and self._prev, 507 ) 508 509 def _parse_join( 510 self, skip_join_token: bool = False, parse_bracket: bool = False 511 ) -> t.Optional[exp.Join]: 512 join = super()._parse_join(skip_join_token=skip_join_token, parse_bracket=True) 513 if join: 514 join.set("global", join.args.pop("method", None)) 515 516 return join 517 518 def _parse_function( 519 self, 520 functions: t.Optional[t.Dict[str, t.Callable]] = None, 521 anonymous: bool = False, 522 optional_parens: bool = True, 523 any_token: bool = False, 524 ) -> t.Optional[exp.Expression]: 525 expr = super()._parse_function( 526 functions=functions, 527 anonymous=anonymous, 528 optional_parens=optional_parens, 529 any_token=any_token, 530 ) 531 532 func = expr.this if isinstance(expr, exp.Window) else expr 533 534 # Aggregate functions can be split in 2 parts: <func_name><suffix> 535 parts = ( 536 self.AGG_FUNC_MAPPING.get(func.this) if isinstance(func, exp.Anonymous) else None 537 ) 538 539 if parts: 540 params = self._parse_func_params(func) 541 542 kwargs = { 543 "this": func.this, 544 "expressions": func.expressions, 545 } 546 if parts[1]: 547 kwargs["parts"] = parts 548 exp_class = exp.CombinedParameterizedAgg if params else exp.CombinedAggFunc 549 else: 550 exp_class = exp.ParameterizedAgg if params else exp.AnonymousAggFunc 551 552 kwargs["exp_class"] = exp_class 553 if params: 554 kwargs["params"] = params 555 556 func = self.expression(**kwargs) 557 558 if isinstance(expr, exp.Window): 559 # The window's func was parsed as Anonymous in base parser, fix its 560 # type to be CH style CombinedAnonymousAggFunc / AnonymousAggFunc 561 expr.set("this", func) 562 elif params: 563 # Params have blocked super()._parse_function() from parsing the following window 564 # (if that exists) as they're standing between the function call and the window spec 565 expr = self._parse_window(func) 566 else: 567 expr = func 568 569 return expr 570 571 def _parse_func_params( 572 self, this: t.Optional[exp.Func] = None 573 ) -> t.Optional[t.List[exp.Expression]]: 574 if self._match_pair(TokenType.R_PAREN, TokenType.L_PAREN): 575 return self._parse_csv(self._parse_lambda) 576 577 if self._match(TokenType.L_PAREN): 578 params = self._parse_csv(self._parse_lambda) 579 self._match_r_paren(this) 580 return params 581 582 return None 583 584 def _parse_quantile(self) -> exp.Quantile: 585 this = self._parse_lambda() 586 params = self._parse_func_params() 587 if params: 588 return self.expression(exp.Quantile, this=params[0], quantile=this) 589 return self.expression(exp.Quantile, this=this, quantile=exp.Literal.number(0.5)) 590 591 def _parse_wrapped_id_vars(self, optional: bool = False) -> t.List[exp.Expression]: 592 return super()._parse_wrapped_id_vars(optional=True) 593 594 def _parse_primary_key( 595 self, wrapped_optional: bool = False, in_props: bool = False 596 ) -> exp.PrimaryKeyColumnConstraint | exp.PrimaryKey: 597 return super()._parse_primary_key( 598 wrapped_optional=wrapped_optional or in_props, in_props=in_props 599 ) 600 601 def _parse_on_property(self) -> t.Optional[exp.Expression]: 602 index = self._index 603 if self._match_text_seq("CLUSTER"): 604 this = self._parse_id_var() 605 if this: 606 return self.expression(exp.OnCluster, this=this) 607 else: 608 self._retreat(index) 609 return None 610 611 def _parse_index_constraint( 612 self, kind: t.Optional[str] = None 613 ) -> exp.IndexColumnConstraint: 614 # INDEX name1 expr TYPE type1(args) GRANULARITY value 615 this = self._parse_id_var() 616 expression = self._parse_assignment() 617 618 index_type = self._match_text_seq("TYPE") and ( 619 self._parse_function() or self._parse_var() 620 ) 621 622 granularity = self._match_text_seq("GRANULARITY") and self._parse_term() 623 624 return self.expression( 625 exp.IndexColumnConstraint, 626 this=this, 627 expression=expression, 628 index_type=index_type, 629 granularity=granularity, 630 ) 631 632 def _parse_partition(self) -> t.Optional[exp.Partition]: 633 # https://clickhouse.com/docs/en/sql-reference/statements/alter/partition#how-to-set-partition-expression 634 if not self._match(TokenType.PARTITION): 635 return None 636 637 if self._match_text_seq("ID"): 638 # Corresponds to the PARTITION ID <string_value> syntax 639 expressions: t.List[exp.Expression] = [ 640 self.expression(exp.PartitionId, this=self._parse_string()) 641 ] 642 else: 643 expressions = self._parse_expressions() 644 645 return self.expression(exp.Partition, expressions=expressions) 646 647 def _parse_alter_table_replace(self) -> t.Optional[exp.Expression]: 648 partition = self._parse_partition() 649 650 if not partition or not self._match(TokenType.FROM): 651 return None 652 653 return self.expression( 654 exp.ReplacePartition, expression=partition, source=self._parse_table_parts() 655 ) 656 657 def _parse_projection_def(self) -> t.Optional[exp.ProjectionDef]: 658 if not self._match_text_seq("PROJECTION"): 659 return None 660 661 return self.expression( 662 exp.ProjectionDef, 663 this=self._parse_id_var(), 664 expression=self._parse_wrapped(self._parse_statement), 665 ) 666 667 def _parse_constraint(self) -> t.Optional[exp.Expression]: 668 return super()._parse_constraint() or self._parse_projection_def() 669 670 class Generator(generator.Generator): 671 QUERY_HINTS = False 672 STRUCT_DELIMITER = ("(", ")") 673 NVL2_SUPPORTED = False 674 TABLESAMPLE_REQUIRES_PARENS = False 675 TABLESAMPLE_SIZE_IS_ROWS = False 676 TABLESAMPLE_KEYWORDS = "SAMPLE" 677 LAST_DAY_SUPPORTS_DATE_PART = False 678 CAN_IMPLEMENT_ARRAY_ANY = True 679 SUPPORTS_TO_NUMBER = False 680 JOIN_HINTS = False 681 TABLE_HINTS = False 682 EXPLICIT_SET_OP = True 683 GROUPINGS_SEP = "" 684 SET_OP_MODIFIERS = False 685 686 STRING_TYPE_MAPPING = { 687 exp.DataType.Type.CHAR: "String", 688 exp.DataType.Type.LONGBLOB: "String", 689 exp.DataType.Type.LONGTEXT: "String", 690 exp.DataType.Type.MEDIUMBLOB: "String", 691 exp.DataType.Type.MEDIUMTEXT: "String", 692 exp.DataType.Type.TINYBLOB: "String", 693 exp.DataType.Type.TINYTEXT: "String", 694 exp.DataType.Type.TEXT: "String", 695 exp.DataType.Type.VARBINARY: "String", 696 exp.DataType.Type.VARCHAR: "String", 697 } 698 699 SUPPORTED_JSON_PATH_PARTS = { 700 exp.JSONPathKey, 701 exp.JSONPathRoot, 702 exp.JSONPathSubscript, 703 } 704 705 TYPE_MAPPING = { 706 **generator.Generator.TYPE_MAPPING, 707 **STRING_TYPE_MAPPING, 708 exp.DataType.Type.ARRAY: "Array", 709 exp.DataType.Type.BIGINT: "Int64", 710 exp.DataType.Type.DATE32: "Date32", 711 exp.DataType.Type.DATETIME64: "DateTime64", 712 exp.DataType.Type.DOUBLE: "Float64", 713 exp.DataType.Type.ENUM: "Enum", 714 exp.DataType.Type.ENUM8: "Enum8", 715 exp.DataType.Type.ENUM16: "Enum16", 716 exp.DataType.Type.FIXEDSTRING: "FixedString", 717 exp.DataType.Type.FLOAT: "Float32", 718 exp.DataType.Type.INT: "Int32", 719 exp.DataType.Type.MEDIUMINT: "Int32", 720 exp.DataType.Type.INT128: "Int128", 721 exp.DataType.Type.INT256: "Int256", 722 exp.DataType.Type.LOWCARDINALITY: "LowCardinality", 723 exp.DataType.Type.MAP: "Map", 724 exp.DataType.Type.NESTED: "Nested", 725 exp.DataType.Type.NULLABLE: "Nullable", 726 exp.DataType.Type.SMALLINT: "Int16", 727 exp.DataType.Type.STRUCT: "Tuple", 728 exp.DataType.Type.TINYINT: "Int8", 729 exp.DataType.Type.UBIGINT: "UInt64", 730 exp.DataType.Type.UINT: "UInt32", 731 exp.DataType.Type.UINT128: "UInt128", 732 exp.DataType.Type.UINT256: "UInt256", 733 exp.DataType.Type.USMALLINT: "UInt16", 734 exp.DataType.Type.UTINYINT: "UInt8", 735 exp.DataType.Type.IPV4: "IPv4", 736 exp.DataType.Type.IPV6: "IPv6", 737 exp.DataType.Type.AGGREGATEFUNCTION: "AggregateFunction", 738 exp.DataType.Type.SIMPLEAGGREGATEFUNCTION: "SimpleAggregateFunction", 739 } 740 741 TRANSFORMS = { 742 **generator.Generator.TRANSFORMS, 743 exp.AnyValue: rename_func("any"), 744 exp.ApproxDistinct: rename_func("uniq"), 745 exp.ArrayFilter: lambda self, e: self.func("arrayFilter", e.expression, e.this), 746 exp.ArraySize: rename_func("LENGTH"), 747 exp.ArraySum: rename_func("arraySum"), 748 exp.ArgMax: arg_max_or_min_no_count("argMax"), 749 exp.ArgMin: arg_max_or_min_no_count("argMin"), 750 exp.Array: inline_array_sql, 751 exp.CastToStrType: rename_func("CAST"), 752 exp.CountIf: rename_func("countIf"), 753 exp.CompressColumnConstraint: lambda self, 754 e: f"CODEC({self.expressions(e, key='this', flat=True)})", 755 exp.ComputedColumnConstraint: lambda self, 756 e: f"{'MATERIALIZED' if e.args.get('persisted') else 'ALIAS'} {self.sql(e, 'this')}", 757 exp.CurrentDate: lambda self, e: self.func("CURRENT_DATE"), 758 exp.DateAdd: _datetime_delta_sql("DATE_ADD"), 759 exp.DateDiff: _datetime_delta_sql("DATE_DIFF"), 760 exp.DateSub: _datetime_delta_sql("DATE_SUB"), 761 exp.Explode: rename_func("arrayJoin"), 762 exp.Final: lambda self, e: f"{self.sql(e, 'this')} FINAL", 763 exp.IsNan: rename_func("isNaN"), 764 exp.JSONExtract: json_extract_segments("JSONExtractString", quoted_index=False), 765 exp.JSONExtractScalar: json_extract_segments("JSONExtractString", quoted_index=False), 766 exp.JSONPathKey: json_path_key_only_name, 767 exp.JSONPathRoot: lambda *_: "", 768 exp.Map: lambda self, e: _lower_func(var_map_sql(self, e)), 769 exp.Nullif: rename_func("nullIf"), 770 exp.PartitionedByProperty: lambda self, e: f"PARTITION BY {self.sql(e, 'this')}", 771 exp.Pivot: no_pivot_sql, 772 exp.Quantile: _quantile_sql, 773 exp.RegexpLike: lambda self, e: self.func("match", e.this, e.expression), 774 exp.Rand: rename_func("randCanonical"), 775 exp.Select: transforms.preprocess([transforms.eliminate_qualify]), 776 exp.StartsWith: rename_func("startsWith"), 777 exp.StrPosition: lambda self, e: self.func( 778 "position", e.this, e.args.get("substr"), e.args.get("position") 779 ), 780 exp.TimeToStr: lambda self, e: self.func( 781 "DATE_FORMAT", e.this, self.format_time(e), e.args.get("timezone") 782 ), 783 exp.TimestampAdd: _datetime_delta_sql("TIMESTAMP_ADD"), 784 exp.TimestampSub: _datetime_delta_sql("TIMESTAMP_SUB"), 785 exp.VarMap: lambda self, e: _lower_func(var_map_sql(self, e)), 786 exp.Xor: lambda self, e: self.func("xor", e.this, e.expression, *e.expressions), 787 exp.MD5Digest: rename_func("MD5"), 788 exp.MD5: lambda self, e: self.func("LOWER", self.func("HEX", self.func("MD5", e.this))), 789 exp.SHA: rename_func("SHA1"), 790 exp.SHA2: sha256_sql, 791 exp.UnixToTime: _unix_to_time_sql, 792 exp.TimestampTrunc: timestamptrunc_sql(zone=True), 793 exp.Variance: rename_func("varSamp"), 794 exp.Stddev: rename_func("stddevSamp"), 795 } 796 797 PROPERTIES_LOCATION = { 798 **generator.Generator.PROPERTIES_LOCATION, 799 exp.VolatileProperty: exp.Properties.Location.UNSUPPORTED, 800 exp.PartitionedByProperty: exp.Properties.Location.POST_SCHEMA, 801 exp.OnCluster: exp.Properties.Location.POST_NAME, 802 } 803 804 # there's no list in docs, but it can be found in Clickhouse code 805 # see `ClickHouse/src/Parsers/ParserCreate*.cpp` 806 ON_CLUSTER_TARGETS = { 807 "DATABASE", 808 "TABLE", 809 "VIEW", 810 "DICTIONARY", 811 "INDEX", 812 "FUNCTION", 813 "NAMED COLLECTION", 814 } 815 816 def _jsonpathsubscript_sql(self, expression: exp.JSONPathSubscript) -> str: 817 this = self.json_path_part(expression.this) 818 return str(int(this) + 1) if is_int(this) else this 819 820 def likeproperty_sql(self, expression: exp.LikeProperty) -> str: 821 return f"AS {self.sql(expression, 'this')}" 822 823 def _any_to_has( 824 self, 825 expression: exp.EQ | exp.NEQ, 826 default: t.Callable[[t.Any], str], 827 prefix: str = "", 828 ) -> str: 829 if isinstance(expression.left, exp.Any): 830 arr = expression.left 831 this = expression.right 832 elif isinstance(expression.right, exp.Any): 833 arr = expression.right 834 this = expression.left 835 else: 836 return default(expression) 837 838 return prefix + self.func("has", arr.this.unnest(), this) 839 840 def eq_sql(self, expression: exp.EQ) -> str: 841 return self._any_to_has(expression, super().eq_sql) 842 843 def neq_sql(self, expression: exp.NEQ) -> str: 844 return self._any_to_has(expression, super().neq_sql, "NOT ") 845 846 def regexpilike_sql(self, expression: exp.RegexpILike) -> str: 847 # Manually add a flag to make the search case-insensitive 848 regex = self.func("CONCAT", "'(?i)'", expression.expression) 849 return self.func("match", expression.this, regex) 850 851 def datatype_sql(self, expression: exp.DataType) -> str: 852 # String is the standard ClickHouse type, every other variant is just an alias. 853 # Additionally, any supplied length parameter will be ignored. 854 # 855 # https://clickhouse.com/docs/en/sql-reference/data-types/string 856 if expression.this in self.STRING_TYPE_MAPPING: 857 return "String" 858 859 return super().datatype_sql(expression) 860 861 def cte_sql(self, expression: exp.CTE) -> str: 862 if expression.args.get("scalar"): 863 this = self.sql(expression, "this") 864 alias = self.sql(expression, "alias") 865 return f"{this} AS {alias}" 866 867 return super().cte_sql(expression) 868 869 def after_limit_modifiers(self, expression: exp.Expression) -> t.List[str]: 870 return super().after_limit_modifiers(expression) + [ 871 ( 872 self.seg("SETTINGS ") + self.expressions(expression, key="settings", flat=True) 873 if expression.args.get("settings") 874 else "" 875 ), 876 ( 877 self.seg("FORMAT ") + self.sql(expression, "format") 878 if expression.args.get("format") 879 else "" 880 ), 881 ] 882 883 def parameterizedagg_sql(self, expression: exp.ParameterizedAgg) -> str: 884 params = self.expressions(expression, key="params", flat=True) 885 return self.func(expression.name, *expression.expressions) + f"({params})" 886 887 def anonymousaggfunc_sql(self, expression: exp.AnonymousAggFunc) -> str: 888 return self.func(expression.name, *expression.expressions) 889 890 def combinedaggfunc_sql(self, expression: exp.CombinedAggFunc) -> str: 891 return self.anonymousaggfunc_sql(expression) 892 893 def combinedparameterizedagg_sql(self, expression: exp.CombinedParameterizedAgg) -> str: 894 return self.parameterizedagg_sql(expression) 895 896 def placeholder_sql(self, expression: exp.Placeholder) -> str: 897 return f"{{{expression.name}: {self.sql(expression, 'kind')}}}" 898 899 def oncluster_sql(self, expression: exp.OnCluster) -> str: 900 return f"ON CLUSTER {self.sql(expression, 'this')}" 901 902 def createable_sql(self, expression: exp.Create, locations: t.DefaultDict) -> str: 903 if expression.kind in self.ON_CLUSTER_TARGETS and locations.get( 904 exp.Properties.Location.POST_NAME 905 ): 906 this_name = self.sql(expression.this, "this") 907 this_properties = " ".join( 908 [self.sql(prop) for prop in locations[exp.Properties.Location.POST_NAME]] 909 ) 910 this_schema = self.schema_columns_sql(expression.this) 911 return f"{this_name}{self.sep()}{this_properties}{self.sep()}{this_schema}" 912 913 return super().createable_sql(expression, locations) 914 915 def prewhere_sql(self, expression: exp.PreWhere) -> str: 916 this = self.indent(self.sql(expression, "this")) 917 return f"{self.seg('PREWHERE')}{self.sep()}{this}" 918 919 def indexcolumnconstraint_sql(self, expression: exp.IndexColumnConstraint) -> str: 920 this = self.sql(expression, "this") 921 this = f" {this}" if this else "" 922 expr = self.sql(expression, "expression") 923 expr = f" {expr}" if expr else "" 924 index_type = self.sql(expression, "index_type") 925 index_type = f" TYPE {index_type}" if index_type else "" 926 granularity = self.sql(expression, "granularity") 927 granularity = f" GRANULARITY {granularity}" if granularity else "" 928 929 return f"INDEX{this}{expr}{index_type}{granularity}" 930 931 def partition_sql(self, expression: exp.Partition) -> str: 932 return f"PARTITION {self.expressions(expression, flat=True)}" 933 934 def partitionid_sql(self, expression: exp.PartitionId) -> str: 935 return f"ID {self.sql(expression.this)}" 936 937 def replacepartition_sql(self, expression: exp.ReplacePartition) -> str: 938 return ( 939 f"REPLACE {self.sql(expression.expression)} FROM {self.sql(expression, 'source')}" 940 ) 941 942 def projectiondef_sql(self, expression: exp.ProjectionDef) -> str: 943 return f"PROJECTION {self.sql(expression.this)} {self.wrap(expression.expression)}"
Determines how function names are going to be normalized.
Possible values:
"upper" or True: Convert names to uppercase. "lower": Convert names to lowercase. False: Disables function name normalization.
Default NULL
ordering method to use if not explicitly set.
Possible values: "nulls_are_small"
, "nulls_are_large"
, "nulls_are_last"
Whether the base comes first in the LOG
function.
Possible values: True
, False
, None
(two arguments are not supported by LOG
)
Whether alias reference expansion (_expand_alias_refs()) should run before column qualification (_qualify_columns()).
For example:
WITH data AS ( SELECT 1 AS id, 2 AS my_id ) SELECT id AS my_id FROM data WHERE my_id = 1 GROUP BY my_id, HAVING my_id = 1
In most dialects "my_id" would refer to "data.my_id" (which is done in _qualify_columns()) across the query, except: - BigQuery, which will forward the alias to GROUP BY + HAVING clauses i.e it resolves to "WHERE my_id = 1 GROUP BY id HAVING id = 1" - Clickhouse, which will forward the alias across the query i.e it resolves to "WHERE id = 1 GROUP BY id HAVING id = 1"
Mapping of an escaped sequence (\n
) to its unescaped version (
).
Inherited Members
- sqlglot.dialects.dialect.Dialect
- Dialect
- INDEX_OFFSET
- WEEK_OFFSET
- UNNEST_COLUMN_ONLY
- ALIAS_POST_TABLESAMPLE
- TABLESAMPLE_SIZE_IS_PERCENT
- NORMALIZATION_STRATEGY
- IDENTIFIERS_CAN_START_WITH_DIGIT
- DPIPE_IS_STRING_CONCAT
- STRICT_STRING_CONCAT
- SUPPORTS_SEMI_ANTI_JOIN
- COPY_PARAMS_ARE_CSV
- TYPED_DIVISION
- CONCAT_COALESCE
- HEX_LOWERCASE
- DATE_FORMAT
- DATEINT_FORMAT
- TIME_FORMAT
- TIME_MAPPING
- FORMAT_MAPPING
- PSEUDOCOLUMNS
- PREFER_CTE_ALIAS_COLUMN
- EXPAND_ALIAS_REFS_EARLY_ONLY_IN_GROUP_BY
- DATE_PART_MAPPING
- get_or_raise
- format_time
- settings
- normalize_identifier
- case_sensitive
- can_identify
- quote_identifier
- to_json_path
- parse
- parse_into
- generate
- transpile
- tokenize
- tokenizer
- jsonpath_tokenizer
- parser
- generator
112 class Tokenizer(tokens.Tokenizer): 113 COMMENTS = ["--", "#", "#!", ("/*", "*/")] 114 IDENTIFIERS = ['"', "`"] 115 STRING_ESCAPES = ["'", "\\"] 116 BIT_STRINGS = [("0b", "")] 117 HEX_STRINGS = [("0x", ""), ("0X", "")] 118 HEREDOC_STRINGS = ["$"] 119 120 KEYWORDS = { 121 **tokens.Tokenizer.KEYWORDS, 122 "ATTACH": TokenType.COMMAND, 123 "DATE32": TokenType.DATE32, 124 "DATETIME64": TokenType.DATETIME64, 125 "DICTIONARY": TokenType.DICTIONARY, 126 "ENUM8": TokenType.ENUM8, 127 "ENUM16": TokenType.ENUM16, 128 "FINAL": TokenType.FINAL, 129 "FIXEDSTRING": TokenType.FIXEDSTRING, 130 "FLOAT32": TokenType.FLOAT, 131 "FLOAT64": TokenType.DOUBLE, 132 "GLOBAL": TokenType.GLOBAL, 133 "INT256": TokenType.INT256, 134 "LOWCARDINALITY": TokenType.LOWCARDINALITY, 135 "MAP": TokenType.MAP, 136 "NESTED": TokenType.NESTED, 137 "SAMPLE": TokenType.TABLE_SAMPLE, 138 "TUPLE": TokenType.STRUCT, 139 "UINT128": TokenType.UINT128, 140 "UINT16": TokenType.USMALLINT, 141 "UINT256": TokenType.UINT256, 142 "UINT32": TokenType.UINT, 143 "UINT64": TokenType.UBIGINT, 144 "UINT8": TokenType.UTINYINT, 145 "IPV4": TokenType.IPV4, 146 "IPV6": TokenType.IPV6, 147 "AGGREGATEFUNCTION": TokenType.AGGREGATEFUNCTION, 148 "SIMPLEAGGREGATEFUNCTION": TokenType.SIMPLEAGGREGATEFUNCTION, 149 "SYSTEM": TokenType.COMMAND, 150 "PREWHERE": TokenType.PREWHERE, 151 } 152 KEYWORDS.pop("/*+") 153 154 SINGLE_TOKENS = { 155 **tokens.Tokenizer.SINGLE_TOKENS, 156 "$": TokenType.HEREDOC_STRING, 157 }
Inherited Members
- sqlglot.tokens.Tokenizer
- Tokenizer
- BYTE_STRINGS
- RAW_STRINGS
- UNICODE_STRINGS
- IDENTIFIER_ESCAPES
- QUOTES
- VAR_SINGLE_TOKENS
- HEREDOC_TAG_IS_IDENTIFIER
- HEREDOC_STRING_ALTERNATIVE
- STRING_ESCAPES_ALLOWED_IN_RAW_STRINGS
- WHITE_SPACE
- COMMANDS
- COMMAND_PREFIX_TOKENS
- NUMERIC_LITERALS
- dialect
- reset
- tokenize
- tokenize_rs
- size
- sql
- tokens
159 class Parser(parser.Parser): 160 # Tested in ClickHouse's playground, it seems that the following two queries do the same thing 161 # * select x from t1 union all select x from t2 limit 1; 162 # * select x from t1 union all (select x from t2 limit 1); 163 MODIFIERS_ATTACHED_TO_SET_OP = False 164 INTERVAL_SPANS = False 165 166 FUNCTIONS = { 167 **parser.Parser.FUNCTIONS, 168 "ANY": exp.AnyValue.from_arg_list, 169 "ARRAYSUM": exp.ArraySum.from_arg_list, 170 "COUNTIF": _build_count_if, 171 "DATE_ADD": build_date_delta(exp.DateAdd, default_unit=None), 172 "DATEADD": build_date_delta(exp.DateAdd, default_unit=None), 173 "DATE_DIFF": build_date_delta(exp.DateDiff, default_unit=None), 174 "DATEDIFF": build_date_delta(exp.DateDiff, default_unit=None), 175 "DATE_FORMAT": _build_date_format, 176 "DATE_SUB": build_date_delta(exp.DateSub, default_unit=None), 177 "DATESUB": build_date_delta(exp.DateSub, default_unit=None), 178 "FORMATDATETIME": _build_date_format, 179 "JSONEXTRACTSTRING": build_json_extract_path( 180 exp.JSONExtractScalar, zero_based_indexing=False 181 ), 182 "MAP": parser.build_var_map, 183 "MATCH": exp.RegexpLike.from_arg_list, 184 "RANDCANONICAL": exp.Rand.from_arg_list, 185 "TUPLE": exp.Struct.from_arg_list, 186 "TIMESTAMP_SUB": build_date_delta(exp.TimestampSub, default_unit=None), 187 "TIMESTAMPSUB": build_date_delta(exp.TimestampSub, default_unit=None), 188 "TIMESTAMP_ADD": build_date_delta(exp.TimestampAdd, default_unit=None), 189 "TIMESTAMPADD": build_date_delta(exp.TimestampAdd, default_unit=None), 190 "UNIQ": exp.ApproxDistinct.from_arg_list, 191 "XOR": lambda args: exp.Xor(expressions=args), 192 "MD5": exp.MD5Digest.from_arg_list, 193 "SHA256": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(256)), 194 "SHA512": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(512)), 195 } 196 197 AGG_FUNCTIONS = { 198 "count", 199 "min", 200 "max", 201 "sum", 202 "avg", 203 "any", 204 "stddevPop", 205 "stddevSamp", 206 "varPop", 207 "varSamp", 208 "corr", 209 "covarPop", 210 "covarSamp", 211 "entropy", 212 "exponentialMovingAverage", 213 "intervalLengthSum", 214 "kolmogorovSmirnovTest", 215 "mannWhitneyUTest", 216 "median", 217 "rankCorr", 218 "sumKahan", 219 "studentTTest", 220 "welchTTest", 221 "anyHeavy", 222 "anyLast", 223 "boundingRatio", 224 "first_value", 225 "last_value", 226 "argMin", 227 "argMax", 228 "avgWeighted", 229 "topK", 230 "topKWeighted", 231 "deltaSum", 232 "deltaSumTimestamp", 233 "groupArray", 234 "groupArrayLast", 235 "groupUniqArray", 236 "groupArrayInsertAt", 237 "groupArrayMovingAvg", 238 "groupArrayMovingSum", 239 "groupArraySample", 240 "groupBitAnd", 241 "groupBitOr", 242 "groupBitXor", 243 "groupBitmap", 244 "groupBitmapAnd", 245 "groupBitmapOr", 246 "groupBitmapXor", 247 "sumWithOverflow", 248 "sumMap", 249 "minMap", 250 "maxMap", 251 "skewSamp", 252 "skewPop", 253 "kurtSamp", 254 "kurtPop", 255 "uniq", 256 "uniqExact", 257 "uniqCombined", 258 "uniqCombined64", 259 "uniqHLL12", 260 "uniqTheta", 261 "quantile", 262 "quantiles", 263 "quantileExact", 264 "quantilesExact", 265 "quantileExactLow", 266 "quantilesExactLow", 267 "quantileExactHigh", 268 "quantilesExactHigh", 269 "quantileExactWeighted", 270 "quantilesExactWeighted", 271 "quantileTiming", 272 "quantilesTiming", 273 "quantileTimingWeighted", 274 "quantilesTimingWeighted", 275 "quantileDeterministic", 276 "quantilesDeterministic", 277 "quantileTDigest", 278 "quantilesTDigest", 279 "quantileTDigestWeighted", 280 "quantilesTDigestWeighted", 281 "quantileBFloat16", 282 "quantilesBFloat16", 283 "quantileBFloat16Weighted", 284 "quantilesBFloat16Weighted", 285 "simpleLinearRegression", 286 "stochasticLinearRegression", 287 "stochasticLogisticRegression", 288 "categoricalInformationValue", 289 "contingency", 290 "cramersV", 291 "cramersVBiasCorrected", 292 "theilsU", 293 "maxIntersections", 294 "maxIntersectionsPosition", 295 "meanZTest", 296 "quantileInterpolatedWeighted", 297 "quantilesInterpolatedWeighted", 298 "quantileGK", 299 "quantilesGK", 300 "sparkBar", 301 "sumCount", 302 "largestTriangleThreeBuckets", 303 "histogram", 304 "sequenceMatch", 305 "sequenceCount", 306 "windowFunnel", 307 "retention", 308 "uniqUpTo", 309 "sequenceNextNode", 310 "exponentialTimeDecayedAvg", 311 } 312 313 AGG_FUNCTIONS_SUFFIXES = [ 314 "If", 315 "Array", 316 "ArrayIf", 317 "Map", 318 "SimpleState", 319 "State", 320 "Merge", 321 "MergeState", 322 "ForEach", 323 "Distinct", 324 "OrDefault", 325 "OrNull", 326 "Resample", 327 "ArgMin", 328 "ArgMax", 329 ] 330 331 FUNC_TOKENS = { 332 *parser.Parser.FUNC_TOKENS, 333 TokenType.SET, 334 } 335 336 AGG_FUNC_MAPPING = ( 337 lambda functions, suffixes: { 338 f"{f}{sfx}": (f, sfx) for sfx in (suffixes + [""]) for f in functions 339 } 340 )(AGG_FUNCTIONS, AGG_FUNCTIONS_SUFFIXES) 341 342 FUNCTIONS_WITH_ALIASED_ARGS = {*parser.Parser.FUNCTIONS_WITH_ALIASED_ARGS, "TUPLE"} 343 344 FUNCTION_PARSERS = { 345 **parser.Parser.FUNCTION_PARSERS, 346 "ARRAYJOIN": lambda self: self.expression(exp.Explode, this=self._parse_expression()), 347 "QUANTILE": lambda self: self._parse_quantile(), 348 } 349 350 FUNCTION_PARSERS.pop("MATCH") 351 352 NO_PAREN_FUNCTION_PARSERS = parser.Parser.NO_PAREN_FUNCTION_PARSERS.copy() 353 NO_PAREN_FUNCTION_PARSERS.pop("ANY") 354 355 RANGE_PARSERS = { 356 **parser.Parser.RANGE_PARSERS, 357 TokenType.GLOBAL: lambda self, this: self._match(TokenType.IN) 358 and self._parse_in(this, is_global=True), 359 } 360 361 # The PLACEHOLDER entry is popped because 1) it doesn't affect Clickhouse (it corresponds to 362 # the postgres-specific JSONBContains parser) and 2) it makes parsing the ternary op simpler. 363 COLUMN_OPERATORS = parser.Parser.COLUMN_OPERATORS.copy() 364 COLUMN_OPERATORS.pop(TokenType.PLACEHOLDER) 365 366 JOIN_KINDS = { 367 *parser.Parser.JOIN_KINDS, 368 TokenType.ANY, 369 TokenType.ASOF, 370 TokenType.ARRAY, 371 } 372 373 TABLE_ALIAS_TOKENS = parser.Parser.TABLE_ALIAS_TOKENS - { 374 TokenType.ANY, 375 TokenType.ARRAY, 376 TokenType.FINAL, 377 TokenType.FORMAT, 378 TokenType.SETTINGS, 379 } 380 381 ALIAS_TOKENS = parser.Parser.ALIAS_TOKENS - { 382 TokenType.FORMAT, 383 } 384 385 LOG_DEFAULTS_TO_LN = True 386 387 QUERY_MODIFIER_PARSERS = { 388 **parser.Parser.QUERY_MODIFIER_PARSERS, 389 TokenType.SETTINGS: lambda self: ( 390 "settings", 391 self._advance() or self._parse_csv(self._parse_assignment), 392 ), 393 TokenType.FORMAT: lambda self: ("format", self._advance() or self._parse_id_var()), 394 } 395 396 CONSTRAINT_PARSERS = { 397 **parser.Parser.CONSTRAINT_PARSERS, 398 "INDEX": lambda self: self._parse_index_constraint(), 399 "CODEC": lambda self: self._parse_compress(), 400 } 401 402 ALTER_PARSERS = { 403 **parser.Parser.ALTER_PARSERS, 404 "REPLACE": lambda self: self._parse_alter_table_replace(), 405 } 406 407 SCHEMA_UNNAMED_CONSTRAINTS = { 408 *parser.Parser.SCHEMA_UNNAMED_CONSTRAINTS, 409 "INDEX", 410 } 411 412 def _parse_assignment(self) -> t.Optional[exp.Expression]: 413 this = super()._parse_assignment() 414 415 if self._match(TokenType.PLACEHOLDER): 416 return self.expression( 417 exp.If, 418 this=this, 419 true=self._parse_assignment(), 420 false=self._match(TokenType.COLON) and self._parse_assignment(), 421 ) 422 423 return this 424 425 def _parse_placeholder(self) -> t.Optional[exp.Expression]: 426 """ 427 Parse a placeholder expression like SELECT {abc: UInt32} or FROM {table: Identifier} 428 https://clickhouse.com/docs/en/sql-reference/syntax#defining-and-using-query-parameters 429 """ 430 if not self._match(TokenType.L_BRACE): 431 return None 432 433 this = self._parse_id_var() 434 self._match(TokenType.COLON) 435 kind = self._parse_types(check_func=False, allow_identifiers=False) or ( 436 self._match_text_seq("IDENTIFIER") and "Identifier" 437 ) 438 439 if not kind: 440 self.raise_error("Expecting a placeholder type or 'Identifier' for tables") 441 elif not self._match(TokenType.R_BRACE): 442 self.raise_error("Expecting }") 443 444 return self.expression(exp.Placeholder, this=this, kind=kind) 445 446 def _parse_in(self, this: t.Optional[exp.Expression], is_global: bool = False) -> exp.In: 447 this = super()._parse_in(this) 448 this.set("is_global", is_global) 449 return this 450 451 def _parse_table( 452 self, 453 schema: bool = False, 454 joins: bool = False, 455 alias_tokens: t.Optional[t.Collection[TokenType]] = None, 456 parse_bracket: bool = False, 457 is_db_reference: bool = False, 458 parse_partition: bool = False, 459 ) -> t.Optional[exp.Expression]: 460 this = super()._parse_table( 461 schema=schema, 462 joins=joins, 463 alias_tokens=alias_tokens, 464 parse_bracket=parse_bracket, 465 is_db_reference=is_db_reference, 466 ) 467 468 if self._match(TokenType.FINAL): 469 this = self.expression(exp.Final, this=this) 470 471 return this 472 473 def _parse_position(self, haystack_first: bool = False) -> exp.StrPosition: 474 return super()._parse_position(haystack_first=True) 475 476 # https://clickhouse.com/docs/en/sql-reference/statements/select/with/ 477 def _parse_cte(self) -> exp.CTE: 478 # WITH <identifier> AS <subquery expression> 479 cte: t.Optional[exp.CTE] = self._try_parse(super()._parse_cte) 480 481 if not cte: 482 # WITH <expression> AS <identifier> 483 cte = self.expression( 484 exp.CTE, 485 this=self._parse_assignment(), 486 alias=self._parse_table_alias(), 487 scalar=True, 488 ) 489 490 return cte 491 492 def _parse_join_parts( 493 self, 494 ) -> t.Tuple[t.Optional[Token], t.Optional[Token], t.Optional[Token]]: 495 is_global = self._match(TokenType.GLOBAL) and self._prev 496 kind_pre = self._match_set(self.JOIN_KINDS, advance=False) and self._prev 497 498 if kind_pre: 499 kind = self._match_set(self.JOIN_KINDS) and self._prev 500 side = self._match_set(self.JOIN_SIDES) and self._prev 501 return is_global, side, kind 502 503 return ( 504 is_global, 505 self._match_set(self.JOIN_SIDES) and self._prev, 506 self._match_set(self.JOIN_KINDS) and self._prev, 507 ) 508 509 def _parse_join( 510 self, skip_join_token: bool = False, parse_bracket: bool = False 511 ) -> t.Optional[exp.Join]: 512 join = super()._parse_join(skip_join_token=skip_join_token, parse_bracket=True) 513 if join: 514 join.set("global", join.args.pop("method", None)) 515 516 return join 517 518 def _parse_function( 519 self, 520 functions: t.Optional[t.Dict[str, t.Callable]] = None, 521 anonymous: bool = False, 522 optional_parens: bool = True, 523 any_token: bool = False, 524 ) -> t.Optional[exp.Expression]: 525 expr = super()._parse_function( 526 functions=functions, 527 anonymous=anonymous, 528 optional_parens=optional_parens, 529 any_token=any_token, 530 ) 531 532 func = expr.this if isinstance(expr, exp.Window) else expr 533 534 # Aggregate functions can be split in 2 parts: <func_name><suffix> 535 parts = ( 536 self.AGG_FUNC_MAPPING.get(func.this) if isinstance(func, exp.Anonymous) else None 537 ) 538 539 if parts: 540 params = self._parse_func_params(func) 541 542 kwargs = { 543 "this": func.this, 544 "expressions": func.expressions, 545 } 546 if parts[1]: 547 kwargs["parts"] = parts 548 exp_class = exp.CombinedParameterizedAgg if params else exp.CombinedAggFunc 549 else: 550 exp_class = exp.ParameterizedAgg if params else exp.AnonymousAggFunc 551 552 kwargs["exp_class"] = exp_class 553 if params: 554 kwargs["params"] = params 555 556 func = self.expression(**kwargs) 557 558 if isinstance(expr, exp.Window): 559 # The window's func was parsed as Anonymous in base parser, fix its 560 # type to be CH style CombinedAnonymousAggFunc / AnonymousAggFunc 561 expr.set("this", func) 562 elif params: 563 # Params have blocked super()._parse_function() from parsing the following window 564 # (if that exists) as they're standing between the function call and the window spec 565 expr = self._parse_window(func) 566 else: 567 expr = func 568 569 return expr 570 571 def _parse_func_params( 572 self, this: t.Optional[exp.Func] = None 573 ) -> t.Optional[t.List[exp.Expression]]: 574 if self._match_pair(TokenType.R_PAREN, TokenType.L_PAREN): 575 return self._parse_csv(self._parse_lambda) 576 577 if self._match(TokenType.L_PAREN): 578 params = self._parse_csv(self._parse_lambda) 579 self._match_r_paren(this) 580 return params 581 582 return None 583 584 def _parse_quantile(self) -> exp.Quantile: 585 this = self._parse_lambda() 586 params = self._parse_func_params() 587 if params: 588 return self.expression(exp.Quantile, this=params[0], quantile=this) 589 return self.expression(exp.Quantile, this=this, quantile=exp.Literal.number(0.5)) 590 591 def _parse_wrapped_id_vars(self, optional: bool = False) -> t.List[exp.Expression]: 592 return super()._parse_wrapped_id_vars(optional=True) 593 594 def _parse_primary_key( 595 self, wrapped_optional: bool = False, in_props: bool = False 596 ) -> exp.PrimaryKeyColumnConstraint | exp.PrimaryKey: 597 return super()._parse_primary_key( 598 wrapped_optional=wrapped_optional or in_props, in_props=in_props 599 ) 600 601 def _parse_on_property(self) -> t.Optional[exp.Expression]: 602 index = self._index 603 if self._match_text_seq("CLUSTER"): 604 this = self._parse_id_var() 605 if this: 606 return self.expression(exp.OnCluster, this=this) 607 else: 608 self._retreat(index) 609 return None 610 611 def _parse_index_constraint( 612 self, kind: t.Optional[str] = None 613 ) -> exp.IndexColumnConstraint: 614 # INDEX name1 expr TYPE type1(args) GRANULARITY value 615 this = self._parse_id_var() 616 expression = self._parse_assignment() 617 618 index_type = self._match_text_seq("TYPE") and ( 619 self._parse_function() or self._parse_var() 620 ) 621 622 granularity = self._match_text_seq("GRANULARITY") and self._parse_term() 623 624 return self.expression( 625 exp.IndexColumnConstraint, 626 this=this, 627 expression=expression, 628 index_type=index_type, 629 granularity=granularity, 630 ) 631 632 def _parse_partition(self) -> t.Optional[exp.Partition]: 633 # https://clickhouse.com/docs/en/sql-reference/statements/alter/partition#how-to-set-partition-expression 634 if not self._match(TokenType.PARTITION): 635 return None 636 637 if self._match_text_seq("ID"): 638 # Corresponds to the PARTITION ID <string_value> syntax 639 expressions: t.List[exp.Expression] = [ 640 self.expression(exp.PartitionId, this=self._parse_string()) 641 ] 642 else: 643 expressions = self._parse_expressions() 644 645 return self.expression(exp.Partition, expressions=expressions) 646 647 def _parse_alter_table_replace(self) -> t.Optional[exp.Expression]: 648 partition = self._parse_partition() 649 650 if not partition or not self._match(TokenType.FROM): 651 return None 652 653 return self.expression( 654 exp.ReplacePartition, expression=partition, source=self._parse_table_parts() 655 ) 656 657 def _parse_projection_def(self) -> t.Optional[exp.ProjectionDef]: 658 if not self._match_text_seq("PROJECTION"): 659 return None 660 661 return self.expression( 662 exp.ProjectionDef, 663 this=self._parse_id_var(), 664 expression=self._parse_wrapped(self._parse_statement), 665 ) 666 667 def _parse_constraint(self) -> t.Optional[exp.Expression]: 668 return super()._parse_constraint() or self._parse_projection_def()
Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree.
Arguments:
- error_level: The desired error level. Default: ErrorLevel.IMMEDIATE
- error_message_context: The amount of context to capture from a query string when displaying the error message (in number of characters). Default: 100
- max_errors: Maximum number of error messages to include in a raised ParseError. This is only relevant if error_level is ErrorLevel.RAISE. Default: 3
Inherited Members
- sqlglot.parser.Parser
- Parser
- NO_PAREN_FUNCTIONS
- STRUCT_TYPE_TOKENS
- NESTED_TYPE_TOKENS
- ENUM_TYPE_TOKENS
- AGGREGATE_TYPE_TOKENS
- TYPE_TOKENS
- SIGNED_TO_UNSIGNED_TYPE_TOKEN
- SUBQUERY_PREDICATES
- RESERVED_TOKENS
- DB_CREATABLES
- CREATABLES
- INTERVAL_VARS
- ARRAY_CONSTRUCTORS
- COMMENT_TABLE_ALIAS_TOKENS
- UPDATE_ALIAS_TOKENS
- TRIM_TYPES
- CONJUNCTION
- ASSIGNMENT
- DISJUNCTION
- EQUALITY
- COMPARISON
- BITWISE
- TERM
- FACTOR
- EXPONENT
- TIMES
- TIMESTAMPS
- SET_OPERATIONS
- JOIN_METHODS
- JOIN_SIDES
- JOIN_HINTS
- LAMBDAS
- EXPRESSION_PARSERS
- STATEMENT_PARSERS
- UNARY_PARSERS
- STRING_PARSERS
- NUMERIC_PARSERS
- PRIMARY_PARSERS
- PLACEHOLDER_PARSERS
- PROPERTY_PARSERS
- ALTER_ALTER_PARSERS
- INVALID_FUNC_NAME_TOKENS
- KEY_VALUE_DEFINITIONS
- SET_PARSERS
- SHOW_PARSERS
- TYPE_LITERAL_PARSERS
- TYPE_CONVERTERS
- DDL_SELECT_TOKENS
- PRE_VOLATILE_TOKENS
- TRANSACTION_KIND
- TRANSACTION_CHARACTERISTICS
- CONFLICT_ACTIONS
- CREATE_SEQUENCE
- ISOLATED_LOADING_OPTIONS
- USABLES
- CAST_ACTIONS
- INSERT_ALTERNATIVES
- CLONE_KEYWORDS
- HISTORICAL_DATA_KIND
- OPCLASS_FOLLOW_KEYWORDS
- OPTYPE_FOLLOW_TOKENS
- TABLE_INDEX_HINT_TOKENS
- VIEW_ATTRIBUTES
- WINDOW_ALIAS_TOKENS
- WINDOW_BEFORE_PAREN_TOKENS
- WINDOW_SIDES
- JSON_KEY_VALUE_SEPARATOR_TOKENS
- FETCH_TOKENS
- ADD_CONSTRAINT_TOKENS
- DISTINCT_TOKENS
- NULL_TOKENS
- UNNEST_OFFSET_ALIAS_TOKENS
- SELECT_START_TOKENS
- COPY_INTO_VARLEN_OPTIONS
- STRICT_CAST
- PREFIXED_PIVOT_COLUMNS
- IDENTIFY_PIVOT_STRINGS
- ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN
- TABLESAMPLE_CSV
- DEFAULT_SAMPLING_METHOD
- SET_REQUIRES_ASSIGNMENT_DELIMITER
- TRIM_PATTERN_FIRST
- STRING_ALIASES
- SET_OP_MODIFIERS
- NO_PAREN_IF_COMMANDS
- JSON_ARROWS_REQUIRE_JSON_TYPE
- COLON_IS_JSON_EXTRACT
- VALUES_FOLLOWED_BY_PAREN
- SUPPORTS_IMPLICIT_UNNEST
- SUPPORTS_PARTITION_SELECTION
- error_level
- error_message_context
- max_errors
- dialect
- reset
- parse
- parse_into
- check_errors
- raise_error
- expression
- validate_expression
- errors
- sql
670 class Generator(generator.Generator): 671 QUERY_HINTS = False 672 STRUCT_DELIMITER = ("(", ")") 673 NVL2_SUPPORTED = False 674 TABLESAMPLE_REQUIRES_PARENS = False 675 TABLESAMPLE_SIZE_IS_ROWS = False 676 TABLESAMPLE_KEYWORDS = "SAMPLE" 677 LAST_DAY_SUPPORTS_DATE_PART = False 678 CAN_IMPLEMENT_ARRAY_ANY = True 679 SUPPORTS_TO_NUMBER = False 680 JOIN_HINTS = False 681 TABLE_HINTS = False 682 EXPLICIT_SET_OP = True 683 GROUPINGS_SEP = "" 684 SET_OP_MODIFIERS = False 685 686 STRING_TYPE_MAPPING = { 687 exp.DataType.Type.CHAR: "String", 688 exp.DataType.Type.LONGBLOB: "String", 689 exp.DataType.Type.LONGTEXT: "String", 690 exp.DataType.Type.MEDIUMBLOB: "String", 691 exp.DataType.Type.MEDIUMTEXT: "String", 692 exp.DataType.Type.TINYBLOB: "String", 693 exp.DataType.Type.TINYTEXT: "String", 694 exp.DataType.Type.TEXT: "String", 695 exp.DataType.Type.VARBINARY: "String", 696 exp.DataType.Type.VARCHAR: "String", 697 } 698 699 SUPPORTED_JSON_PATH_PARTS = { 700 exp.JSONPathKey, 701 exp.JSONPathRoot, 702 exp.JSONPathSubscript, 703 } 704 705 TYPE_MAPPING = { 706 **generator.Generator.TYPE_MAPPING, 707 **STRING_TYPE_MAPPING, 708 exp.DataType.Type.ARRAY: "Array", 709 exp.DataType.Type.BIGINT: "Int64", 710 exp.DataType.Type.DATE32: "Date32", 711 exp.DataType.Type.DATETIME64: "DateTime64", 712 exp.DataType.Type.DOUBLE: "Float64", 713 exp.DataType.Type.ENUM: "Enum", 714 exp.DataType.Type.ENUM8: "Enum8", 715 exp.DataType.Type.ENUM16: "Enum16", 716 exp.DataType.Type.FIXEDSTRING: "FixedString", 717 exp.DataType.Type.FLOAT: "Float32", 718 exp.DataType.Type.INT: "Int32", 719 exp.DataType.Type.MEDIUMINT: "Int32", 720 exp.DataType.Type.INT128: "Int128", 721 exp.DataType.Type.INT256: "Int256", 722 exp.DataType.Type.LOWCARDINALITY: "LowCardinality", 723 exp.DataType.Type.MAP: "Map", 724 exp.DataType.Type.NESTED: "Nested", 725 exp.DataType.Type.NULLABLE: "Nullable", 726 exp.DataType.Type.SMALLINT: "Int16", 727 exp.DataType.Type.STRUCT: "Tuple", 728 exp.DataType.Type.TINYINT: "Int8", 729 exp.DataType.Type.UBIGINT: "UInt64", 730 exp.DataType.Type.UINT: "UInt32", 731 exp.DataType.Type.UINT128: "UInt128", 732 exp.DataType.Type.UINT256: "UInt256", 733 exp.DataType.Type.USMALLINT: "UInt16", 734 exp.DataType.Type.UTINYINT: "UInt8", 735 exp.DataType.Type.IPV4: "IPv4", 736 exp.DataType.Type.IPV6: "IPv6", 737 exp.DataType.Type.AGGREGATEFUNCTION: "AggregateFunction", 738 exp.DataType.Type.SIMPLEAGGREGATEFUNCTION: "SimpleAggregateFunction", 739 } 740 741 TRANSFORMS = { 742 **generator.Generator.TRANSFORMS, 743 exp.AnyValue: rename_func("any"), 744 exp.ApproxDistinct: rename_func("uniq"), 745 exp.ArrayFilter: lambda self, e: self.func("arrayFilter", e.expression, e.this), 746 exp.ArraySize: rename_func("LENGTH"), 747 exp.ArraySum: rename_func("arraySum"), 748 exp.ArgMax: arg_max_or_min_no_count("argMax"), 749 exp.ArgMin: arg_max_or_min_no_count("argMin"), 750 exp.Array: inline_array_sql, 751 exp.CastToStrType: rename_func("CAST"), 752 exp.CountIf: rename_func("countIf"), 753 exp.CompressColumnConstraint: lambda self, 754 e: f"CODEC({self.expressions(e, key='this', flat=True)})", 755 exp.ComputedColumnConstraint: lambda self, 756 e: f"{'MATERIALIZED' if e.args.get('persisted') else 'ALIAS'} {self.sql(e, 'this')}", 757 exp.CurrentDate: lambda self, e: self.func("CURRENT_DATE"), 758 exp.DateAdd: _datetime_delta_sql("DATE_ADD"), 759 exp.DateDiff: _datetime_delta_sql("DATE_DIFF"), 760 exp.DateSub: _datetime_delta_sql("DATE_SUB"), 761 exp.Explode: rename_func("arrayJoin"), 762 exp.Final: lambda self, e: f"{self.sql(e, 'this')} FINAL", 763 exp.IsNan: rename_func("isNaN"), 764 exp.JSONExtract: json_extract_segments("JSONExtractString", quoted_index=False), 765 exp.JSONExtractScalar: json_extract_segments("JSONExtractString", quoted_index=False), 766 exp.JSONPathKey: json_path_key_only_name, 767 exp.JSONPathRoot: lambda *_: "", 768 exp.Map: lambda self, e: _lower_func(var_map_sql(self, e)), 769 exp.Nullif: rename_func("nullIf"), 770 exp.PartitionedByProperty: lambda self, e: f"PARTITION BY {self.sql(e, 'this')}", 771 exp.Pivot: no_pivot_sql, 772 exp.Quantile: _quantile_sql, 773 exp.RegexpLike: lambda self, e: self.func("match", e.this, e.expression), 774 exp.Rand: rename_func("randCanonical"), 775 exp.Select: transforms.preprocess([transforms.eliminate_qualify]), 776 exp.StartsWith: rename_func("startsWith"), 777 exp.StrPosition: lambda self, e: self.func( 778 "position", e.this, e.args.get("substr"), e.args.get("position") 779 ), 780 exp.TimeToStr: lambda self, e: self.func( 781 "DATE_FORMAT", e.this, self.format_time(e), e.args.get("timezone") 782 ), 783 exp.TimestampAdd: _datetime_delta_sql("TIMESTAMP_ADD"), 784 exp.TimestampSub: _datetime_delta_sql("TIMESTAMP_SUB"), 785 exp.VarMap: lambda self, e: _lower_func(var_map_sql(self, e)), 786 exp.Xor: lambda self, e: self.func("xor", e.this, e.expression, *e.expressions), 787 exp.MD5Digest: rename_func("MD5"), 788 exp.MD5: lambda self, e: self.func("LOWER", self.func("HEX", self.func("MD5", e.this))), 789 exp.SHA: rename_func("SHA1"), 790 exp.SHA2: sha256_sql, 791 exp.UnixToTime: _unix_to_time_sql, 792 exp.TimestampTrunc: timestamptrunc_sql(zone=True), 793 exp.Variance: rename_func("varSamp"), 794 exp.Stddev: rename_func("stddevSamp"), 795 } 796 797 PROPERTIES_LOCATION = { 798 **generator.Generator.PROPERTIES_LOCATION, 799 exp.VolatileProperty: exp.Properties.Location.UNSUPPORTED, 800 exp.PartitionedByProperty: exp.Properties.Location.POST_SCHEMA, 801 exp.OnCluster: exp.Properties.Location.POST_NAME, 802 } 803 804 # there's no list in docs, but it can be found in Clickhouse code 805 # see `ClickHouse/src/Parsers/ParserCreate*.cpp` 806 ON_CLUSTER_TARGETS = { 807 "DATABASE", 808 "TABLE", 809 "VIEW", 810 "DICTIONARY", 811 "INDEX", 812 "FUNCTION", 813 "NAMED COLLECTION", 814 } 815 816 def _jsonpathsubscript_sql(self, expression: exp.JSONPathSubscript) -> str: 817 this = self.json_path_part(expression.this) 818 return str(int(this) + 1) if is_int(this) else this 819 820 def likeproperty_sql(self, expression: exp.LikeProperty) -> str: 821 return f"AS {self.sql(expression, 'this')}" 822 823 def _any_to_has( 824 self, 825 expression: exp.EQ | exp.NEQ, 826 default: t.Callable[[t.Any], str], 827 prefix: str = "", 828 ) -> str: 829 if isinstance(expression.left, exp.Any): 830 arr = expression.left 831 this = expression.right 832 elif isinstance(expression.right, exp.Any): 833 arr = expression.right 834 this = expression.left 835 else: 836 return default(expression) 837 838 return prefix + self.func("has", arr.this.unnest(), this) 839 840 def eq_sql(self, expression: exp.EQ) -> str: 841 return self._any_to_has(expression, super().eq_sql) 842 843 def neq_sql(self, expression: exp.NEQ) -> str: 844 return self._any_to_has(expression, super().neq_sql, "NOT ") 845 846 def regexpilike_sql(self, expression: exp.RegexpILike) -> str: 847 # Manually add a flag to make the search case-insensitive 848 regex = self.func("CONCAT", "'(?i)'", expression.expression) 849 return self.func("match", expression.this, regex) 850 851 def datatype_sql(self, expression: exp.DataType) -> str: 852 # String is the standard ClickHouse type, every other variant is just an alias. 853 # Additionally, any supplied length parameter will be ignored. 854 # 855 # https://clickhouse.com/docs/en/sql-reference/data-types/string 856 if expression.this in self.STRING_TYPE_MAPPING: 857 return "String" 858 859 return super().datatype_sql(expression) 860 861 def cte_sql(self, expression: exp.CTE) -> str: 862 if expression.args.get("scalar"): 863 this = self.sql(expression, "this") 864 alias = self.sql(expression, "alias") 865 return f"{this} AS {alias}" 866 867 return super().cte_sql(expression) 868 869 def after_limit_modifiers(self, expression: exp.Expression) -> t.List[str]: 870 return super().after_limit_modifiers(expression) + [ 871 ( 872 self.seg("SETTINGS ") + self.expressions(expression, key="settings", flat=True) 873 if expression.args.get("settings") 874 else "" 875 ), 876 ( 877 self.seg("FORMAT ") + self.sql(expression, "format") 878 if expression.args.get("format") 879 else "" 880 ), 881 ] 882 883 def parameterizedagg_sql(self, expression: exp.ParameterizedAgg) -> str: 884 params = self.expressions(expression, key="params", flat=True) 885 return self.func(expression.name, *expression.expressions) + f"({params})" 886 887 def anonymousaggfunc_sql(self, expression: exp.AnonymousAggFunc) -> str: 888 return self.func(expression.name, *expression.expressions) 889 890 def combinedaggfunc_sql(self, expression: exp.CombinedAggFunc) -> str: 891 return self.anonymousaggfunc_sql(expression) 892 893 def combinedparameterizedagg_sql(self, expression: exp.CombinedParameterizedAgg) -> str: 894 return self.parameterizedagg_sql(expression) 895 896 def placeholder_sql(self, expression: exp.Placeholder) -> str: 897 return f"{{{expression.name}: {self.sql(expression, 'kind')}}}" 898 899 def oncluster_sql(self, expression: exp.OnCluster) -> str: 900 return f"ON CLUSTER {self.sql(expression, 'this')}" 901 902 def createable_sql(self, expression: exp.Create, locations: t.DefaultDict) -> str: 903 if expression.kind in self.ON_CLUSTER_TARGETS and locations.get( 904 exp.Properties.Location.POST_NAME 905 ): 906 this_name = self.sql(expression.this, "this") 907 this_properties = " ".join( 908 [self.sql(prop) for prop in locations[exp.Properties.Location.POST_NAME]] 909 ) 910 this_schema = self.schema_columns_sql(expression.this) 911 return f"{this_name}{self.sep()}{this_properties}{self.sep()}{this_schema}" 912 913 return super().createable_sql(expression, locations) 914 915 def prewhere_sql(self, expression: exp.PreWhere) -> str: 916 this = self.indent(self.sql(expression, "this")) 917 return f"{self.seg('PREWHERE')}{self.sep()}{this}" 918 919 def indexcolumnconstraint_sql(self, expression: exp.IndexColumnConstraint) -> str: 920 this = self.sql(expression, "this") 921 this = f" {this}" if this else "" 922 expr = self.sql(expression, "expression") 923 expr = f" {expr}" if expr else "" 924 index_type = self.sql(expression, "index_type") 925 index_type = f" TYPE {index_type}" if index_type else "" 926 granularity = self.sql(expression, "granularity") 927 granularity = f" GRANULARITY {granularity}" if granularity else "" 928 929 return f"INDEX{this}{expr}{index_type}{granularity}" 930 931 def partition_sql(self, expression: exp.Partition) -> str: 932 return f"PARTITION {self.expressions(expression, flat=True)}" 933 934 def partitionid_sql(self, expression: exp.PartitionId) -> str: 935 return f"ID {self.sql(expression.this)}" 936 937 def replacepartition_sql(self, expression: exp.ReplacePartition) -> str: 938 return ( 939 f"REPLACE {self.sql(expression.expression)} FROM {self.sql(expression, 'source')}" 940 ) 941 942 def projectiondef_sql(self, expression: exp.ProjectionDef) -> str: 943 return f"PROJECTION {self.sql(expression.this)} {self.wrap(expression.expression)}"
Generator converts a given syntax tree to the corresponding SQL string.
Arguments:
- pretty: Whether to format the produced SQL string. Default: False.
- identify: Determines when an identifier should be quoted. Possible values are: False (default): Never quote, except in cases where it's mandatory by the dialect. True or 'always': Always quote. 'safe': Only quote identifiers that are case insensitive.
- normalize: Whether to normalize identifiers to lowercase. Default: False.
- pad: The pad size in a formatted string. For example, this affects the indentation of a projection in a query, relative to its nesting level. Default: 2.
- indent: The indentation size in a formatted string. For example, this affects the
indentation of subqueries and filters under a
WHERE
clause. Default: 2. - normalize_functions: How to normalize function names. Possible values are: "upper" or True (default): Convert names to uppercase. "lower": Convert names to lowercase. False: Disables function name normalization.
- unsupported_level: Determines the generator's behavior when it encounters unsupported expressions. Default ErrorLevel.WARN.
- max_unsupported: Maximum number of unsupported messages to include in a raised UnsupportedError. This is only relevant if unsupported_level is ErrorLevel.RAISE. Default: 3
- leading_comma: Whether the comma is leading or trailing in select expressions. This is only relevant when generating in pretty mode. Default: False
- max_text_width: The max number of characters in a segment before creating new lines in pretty mode. The default is on the smaller end because the length only represents a segment and not the true line length. Default: 80
- comments: Whether to preserve comments in the output SQL code. Default: True
851 def datatype_sql(self, expression: exp.DataType) -> str: 852 # String is the standard ClickHouse type, every other variant is just an alias. 853 # Additionally, any supplied length parameter will be ignored. 854 # 855 # https://clickhouse.com/docs/en/sql-reference/data-types/string 856 if expression.this in self.STRING_TYPE_MAPPING: 857 return "String" 858 859 return super().datatype_sql(expression)
869 def after_limit_modifiers(self, expression: exp.Expression) -> t.List[str]: 870 return super().after_limit_modifiers(expression) + [ 871 ( 872 self.seg("SETTINGS ") + self.expressions(expression, key="settings", flat=True) 873 if expression.args.get("settings") 874 else "" 875 ), 876 ( 877 self.seg("FORMAT ") + self.sql(expression, "format") 878 if expression.args.get("format") 879 else "" 880 ), 881 ]
902 def createable_sql(self, expression: exp.Create, locations: t.DefaultDict) -> str: 903 if expression.kind in self.ON_CLUSTER_TARGETS and locations.get( 904 exp.Properties.Location.POST_NAME 905 ): 906 this_name = self.sql(expression.this, "this") 907 this_properties = " ".join( 908 [self.sql(prop) for prop in locations[exp.Properties.Location.POST_NAME]] 909 ) 910 this_schema = self.schema_columns_sql(expression.this) 911 return f"{this_name}{self.sep()}{this_properties}{self.sep()}{this_schema}" 912 913 return super().createable_sql(expression, locations)
919 def indexcolumnconstraint_sql(self, expression: exp.IndexColumnConstraint) -> str: 920 this = self.sql(expression, "this") 921 this = f" {this}" if this else "" 922 expr = self.sql(expression, "expression") 923 expr = f" {expr}" if expr else "" 924 index_type = self.sql(expression, "index_type") 925 index_type = f" TYPE {index_type}" if index_type else "" 926 granularity = self.sql(expression, "granularity") 927 granularity = f" GRANULARITY {granularity}" if granularity else "" 928 929 return f"INDEX{this}{expr}{index_type}{granularity}"
Inherited Members
- sqlglot.generator.Generator
- Generator
- NULL_ORDERING_SUPPORTED
- IGNORE_NULLS_IN_FUNC
- LOCKING_READS_SUPPORTED
- WRAP_DERIVED_VALUES
- CREATE_FUNCTION_RETURN_AS
- MATCHED_BY_SOURCE
- SINGLE_STRING_INTERVAL
- INTERVAL_ALLOWS_PLURAL_FORM
- LIMIT_FETCH
- LIMIT_ONLY_LITERALS
- RENAME_TABLE_WITH_DB
- INDEX_ON
- QUERY_HINT_SEP
- IS_BOOL_ALLOWED
- DUPLICATE_KEY_UPDATE_WITH_SET
- LIMIT_IS_TOP
- RETURNING_END
- EXTRACT_ALLOWS_QUOTES
- TZ_TO_WITH_TIME_ZONE
- VALUES_AS_TABLE
- ALTER_TABLE_INCLUDE_COLUMN_KEYWORD
- UNNEST_WITH_ORDINALITY
- AGGREGATE_FILTER_SUPPORTED
- SEMI_ANTI_JOIN_WITH_SIDE
- COMPUTED_COLUMN_WITH_TYPE
- SUPPORTS_TABLE_COPY
- TABLESAMPLE_WITH_METHOD
- TABLESAMPLE_SEED_KEYWORD
- COLLATE_IS_FUNC
- DATA_TYPE_SPECIFIERS_ALLOWED
- ENSURE_BOOLS
- CTE_RECURSIVE_KEYWORD_REQUIRED
- SUPPORTS_SINGLE_ARG_CONCAT
- SUPPORTS_TABLE_ALIAS_COLUMNS
- UNPIVOT_ALIASES_ARE_IDENTIFIERS
- JSON_KEY_VALUE_PAIR_SEP
- INSERT_OVERWRITE
- SUPPORTS_SELECT_INTO
- SUPPORTS_UNLOGGED_TABLES
- SUPPORTS_CREATE_TABLE_LIKE
- LIKE_PROPERTY_INSIDE_SCHEMA
- MULTI_ARG_DISTINCT
- JSON_TYPE_REQUIRED_FOR_EXTRACTION
- JSON_PATH_BRACKETED_KEY_SUPPORTED
- JSON_PATH_SINGLE_QUOTE_ESCAPE
- COPY_PARAMS_ARE_WRAPPED
- COPY_PARAMS_EQ_REQUIRED
- COPY_HAS_INTO_KEYWORD
- STAR_EXCEPT
- HEX_FUNC
- WITH_PROPERTIES_PREFIX
- QUOTE_JSON_PATH
- TIME_PART_SINGULARS
- TOKEN_MAPPING
- PARAMETER_TOKEN
- NAMED_PLACEHOLDER_TOKEN
- RESERVED_KEYWORDS
- WITH_SEPARATED_COMMENTS
- EXCLUDE_COMMENTS
- UNWRAPPED_INTERVAL_VALUES
- PARAMETERIZABLE_TEXT_TYPES
- EXPRESSIONS_WITHOUT_NESTED_CTES
- SENTINEL_LINE_BREAK
- pretty
- identify
- normalize
- pad
- unsupported_level
- max_unsupported
- leading_comma
- max_text_width
- comments
- dialect
- normalize_functions
- unsupported_messages
- generate
- preprocess
- unsupported
- sep
- seg
- pad_comment
- maybe_comment
- wrap
- no_identify
- normalize_func
- indent
- sql
- uncache_sql
- cache_sql
- characterset_sql
- column_parts
- column_sql
- columnposition_sql
- columndef_sql
- columnconstraint_sql
- computedcolumnconstraint_sql
- autoincrementcolumnconstraint_sql
- compresscolumnconstraint_sql
- generatedasidentitycolumnconstraint_sql
- generatedasrowcolumnconstraint_sql
- periodforsystemtimeconstraint_sql
- notnullcolumnconstraint_sql
- transformcolumnconstraint_sql
- primarykeycolumnconstraint_sql
- uniquecolumnconstraint_sql
- create_sql
- sequenceproperties_sql
- clone_sql
- describe_sql
- heredoc_sql
- prepend_ctes
- with_sql
- tablealias_sql
- bitstring_sql
- hexstring_sql
- bytestring_sql
- unicodestring_sql
- rawstring_sql
- datatypeparam_sql
- directory_sql
- delete_sql
- drop_sql
- except_sql
- except_op
- fetch_sql
- filter_sql
- hint_sql
- indexparameters_sql
- index_sql
- identifier_sql
- hex_sql
- lowerhex_sql
- inputoutputformat_sql
- national_sql
- properties_sql
- root_properties
- properties
- with_properties
- locate_properties
- property_name
- property_sql
- fallbackproperty_sql
- journalproperty_sql
- freespaceproperty_sql
- checksumproperty_sql
- mergeblockratioproperty_sql
- datablocksizeproperty_sql
- blockcompressionproperty_sql
- isolatedloadingproperty_sql
- partitionboundspec_sql
- partitionedofproperty_sql
- lockingproperty_sql
- withdataproperty_sql
- withsystemversioningproperty_sql
- insert_sql
- intersect_sql
- intersect_op
- introducer_sql
- kill_sql
- pseudotype_sql
- objectidentifier_sql
- onconflict_sql
- returning_sql
- rowformatdelimitedproperty_sql
- withtablehint_sql
- indextablehint_sql
- historicaldata_sql
- table_parts
- table_sql
- tablesample_sql
- pivot_sql
- version_sql
- tuple_sql
- update_sql
- values_sql
- var_sql
- into_sql
- from_sql
- group_sql
- having_sql
- connect_sql
- prior_sql
- join_sql
- lambda_sql
- lateral_op
- lateral_sql
- limit_sql
- offset_sql
- setitem_sql
- set_sql
- pragma_sql
- lock_sql
- literal_sql
- escape_str
- loaddata_sql
- null_sql
- boolean_sql
- order_sql
- withfill_sql
- cluster_sql
- distribute_sql
- sort_sql
- ordered_sql
- matchrecognizemeasure_sql
- matchrecognize_sql
- query_modifiers
- options_modifier
- queryoption_sql
- offset_limit_modifiers
- select_sql
- schema_sql
- schema_columns_sql
- star_sql
- parameter_sql
- sessionparameter_sql
- subquery_sql
- qualify_sql
- set_operations
- union_sql
- union_op
- unnest_sql
- where_sql
- window_sql
- partition_by_sql
- windowspec_sql
- withingroup_sql
- between_sql
- bracket_offset_expressions
- bracket_sql
- all_sql
- any_sql
- exists_sql
- case_sql
- constraint_sql
- nextvaluefor_sql
- extract_sql
- trim_sql
- convert_concat_args
- concat_sql
- concatws_sql
- check_sql
- foreignkey_sql
- primarykey_sql
- if_sql
- matchagainst_sql
- jsonkeyvalue_sql
- jsonpath_sql
- json_path_part
- formatjson_sql
- jsonobject_sql
- jsonobjectagg_sql
- jsonarray_sql
- jsonarrayagg_sql
- jsoncolumndef_sql
- jsonschema_sql
- jsontable_sql
- openjsoncolumndef_sql
- openjson_sql
- in_sql
- in_unnest_op
- interval_sql
- return_sql
- reference_sql
- anonymous_sql
- paren_sql
- neg_sql
- not_sql
- alias_sql
- pivotalias_sql
- aliases_sql
- atindex_sql
- attimezone_sql
- fromtimezone_sql
- add_sql
- and_sql
- or_sql
- xor_sql
- connector_sql
- bitwiseand_sql
- bitwiseleftshift_sql
- bitwisenot_sql
- bitwiseor_sql
- bitwiserightshift_sql
- bitwisexor_sql
- cast_sql
- currentdate_sql
- currenttimestamp_sql
- collate_sql
- command_sql
- comment_sql
- mergetreettlaction_sql
- mergetreettl_sql
- transaction_sql
- commit_sql
- rollback_sql
- altercolumn_sql
- alterdiststyle_sql
- altersortkey_sql
- renametable_sql
- renamecolumn_sql
- alterset_sql
- altertable_sql
- add_column_sql
- droppartition_sql
- addconstraint_sql
- distinct_sql
- ignorenulls_sql
- respectnulls_sql
- havingmax_sql
- intdiv_sql
- dpipe_sql
- div_sql
- overlaps_sql
- distance_sql
- dot_sql
- propertyeq_sql
- escape_sql
- glob_sql
- gt_sql
- gte_sql
- ilike_sql
- ilikeany_sql
- is_sql
- like_sql
- likeany_sql
- similarto_sql
- lt_sql
- lte_sql
- mod_sql
- mul_sql
- nullsafeeq_sql
- nullsafeneq_sql
- slice_sql
- sub_sql
- trycast_sql
- try_sql
- log_sql
- use_sql
- binary
- function_fallback_sql
- func
- format_args
- too_wide
- format_time
- expressions
- op_expressions
- naked_property
- tag_sql
- token_sql
- userdefinedfunction_sql
- joinhint_sql
- kwarg_sql
- when_sql
- merge_sql
- tochar_sql
- tonumber_sql
- dictproperty_sql
- dictrange_sql
- dictsubproperty_sql
- clusteredbyproperty_sql
- anyvalue_sql
- querytransform_sql
- indexconstraintoption_sql
- checkcolumnconstraint_sql
- nvl2_sql
- comprehension_sql
- columnprefix_sql
- opclass_sql
- predict_sql
- forin_sql
- refresh_sql
- operator_sql
- toarray_sql
- tsordstotime_sql
- tsordstotimestamp_sql
- tsordstodate_sql
- unixdate_sql
- lastday_sql
- dateadd_sql
- arrayany_sql
- generateseries_sql
- struct_sql
- partitionrange_sql
- truncatetable_sql
- convert_sql
- copyparameter_sql
- credentials_sql
- copy_sql
- semicolon_sql
- datadeletionproperty_sql
- maskingpolicycolumnconstraint_sql
- gapfill_sql
- scope_resolution
- scoperesolution_sql