sqlglot.dialects.clickhouse
1from __future__ import annotations 2import typing as t 3import datetime 4from sqlglot import exp, generator, parser, tokens 5from sqlglot.dialects.dialect import ( 6 Dialect, 7 NormalizationStrategy, 8 arg_max_or_min_no_count, 9 build_date_delta, 10 build_formatted_time, 11 inline_array_sql, 12 json_extract_segments, 13 json_path_key_only_name, 14 no_pivot_sql, 15 build_json_extract_path, 16 rename_func, 17 sha256_sql, 18 var_map_sql, 19 timestamptrunc_sql, 20 unit_to_var, 21 trim_sql, 22) 23from sqlglot.generator import Generator 24from sqlglot.helper import is_int, seq_get 25from sqlglot.tokens import Token, TokenType 26 27DATEΤΙΜΕ_DELTA = t.Union[exp.DateAdd, exp.DateDiff, exp.DateSub, exp.TimestampSub, exp.TimestampAdd] 28 29 30def _build_date_format(args: t.List) -> exp.TimeToStr: 31 expr = build_formatted_time(exp.TimeToStr, "clickhouse")(args) 32 33 timezone = seq_get(args, 2) 34 if timezone: 35 expr.set("zone", timezone) 36 37 return expr 38 39 40def _unix_to_time_sql(self: ClickHouse.Generator, expression: exp.UnixToTime) -> str: 41 scale = expression.args.get("scale") 42 timestamp = expression.this 43 44 if scale in (None, exp.UnixToTime.SECONDS): 45 return self.func("fromUnixTimestamp", exp.cast(timestamp, exp.DataType.Type.BIGINT)) 46 if scale == exp.UnixToTime.MILLIS: 47 return self.func("fromUnixTimestamp64Milli", exp.cast(timestamp, exp.DataType.Type.BIGINT)) 48 if scale == exp.UnixToTime.MICROS: 49 return self.func("fromUnixTimestamp64Micro", exp.cast(timestamp, exp.DataType.Type.BIGINT)) 50 if scale == exp.UnixToTime.NANOS: 51 return self.func("fromUnixTimestamp64Nano", exp.cast(timestamp, exp.DataType.Type.BIGINT)) 52 53 return self.func( 54 "fromUnixTimestamp", 55 exp.cast( 56 exp.Div(this=timestamp, expression=exp.func("POW", 10, scale)), exp.DataType.Type.BIGINT 57 ), 58 ) 59 60 61def _lower_func(sql: str) -> str: 62 index = sql.index("(") 63 return sql[:index].lower() + sql[index:] 64 65 66def _quantile_sql(self: ClickHouse.Generator, expression: exp.Quantile) -> str: 67 quantile = expression.args["quantile"] 68 args = f"({self.sql(expression, 'this')})" 69 70 if isinstance(quantile, exp.Array): 71 func = self.func("quantiles", *quantile) 72 else: 73 func = self.func("quantile", quantile) 74 75 return func + args 76 77 78def _build_count_if(args: t.List) -> exp.CountIf | exp.CombinedAggFunc: 79 if len(args) == 1: 80 return exp.CountIf(this=seq_get(args, 0)) 81 82 return exp.CombinedAggFunc(this="countIf", expressions=args, parts=("count", "If")) 83 84 85def _build_str_to_date(args: t.List) -> exp.Cast | exp.Anonymous: 86 if len(args) == 3: 87 return exp.Anonymous(this="STR_TO_DATE", expressions=args) 88 89 strtodate = exp.StrToDate.from_arg_list(args) 90 return exp.cast(strtodate, exp.DataType.build(exp.DataType.Type.DATETIME)) 91 92 93def _datetime_delta_sql(name: str) -> t.Callable[[Generator, DATEΤΙΜΕ_DELTA], str]: 94 def _delta_sql(self: Generator, expression: DATEΤΙΜΕ_DELTA) -> str: 95 if not expression.unit: 96 return rename_func(name)(self, expression) 97 98 return self.func( 99 name, 100 unit_to_var(expression), 101 expression.expression, 102 expression.this, 103 ) 104 105 return _delta_sql 106 107 108def _timestrtotime_sql(self: ClickHouse.Generator, expression: exp.TimeStrToTime): 109 ts = expression.this 110 111 tz = expression.args.get("zone") 112 if tz and isinstance(ts, exp.Literal): 113 # Clickhouse will not accept timestamps that include a UTC offset, so we must remove them. 114 # The first step to removing is parsing the string with `datetime.datetime.fromisoformat`. 115 # 116 # In python <3.11, `fromisoformat()` can only parse timestamps of millisecond (3 digit) 117 # or microsecond (6 digit) precision. It will error if passed any other number of fractional 118 # digits, so we extract the fractional seconds and pad to 6 digits before parsing. 119 ts_string = ts.name.strip() 120 121 # separate [date and time] from [fractional seconds and UTC offset] 122 ts_parts = ts_string.split(".") 123 if len(ts_parts) == 2: 124 # separate fractional seconds and UTC offset 125 offset_sep = "+" if "+" in ts_parts[1] else "-" 126 ts_frac_parts = ts_parts[1].split(offset_sep) 127 num_frac_parts = len(ts_frac_parts) 128 129 # pad to 6 digits if fractional seconds present 130 ts_frac_parts[0] = ts_frac_parts[0].ljust(6, "0") 131 ts_string = "".join( 132 [ 133 ts_parts[0], # date and time 134 ".", 135 ts_frac_parts[0], # fractional seconds 136 offset_sep if num_frac_parts > 1 else "", 137 ts_frac_parts[1] if num_frac_parts > 1 else "", # utc offset (if present) 138 ] 139 ) 140 141 # return literal with no timezone, eg turn '2020-01-01 12:13:14-08:00' into '2020-01-01 12:13:14' 142 # this is because Clickhouse encodes the timezone as a data type parameter and throws an error if 143 # it's part of the timestamp string 144 ts_without_tz = ( 145 datetime.datetime.fromisoformat(ts_string).replace(tzinfo=None).isoformat(sep=" ") 146 ) 147 ts = exp.Literal.string(ts_without_tz) 148 149 # Non-nullable DateTime64 with microsecond precision 150 expressions = [exp.DataTypeParam(this=tz)] if tz else [] 151 datatype = exp.DataType.build( 152 exp.DataType.Type.DATETIME64, 153 expressions=[exp.DataTypeParam(this=exp.Literal.number(6)), *expressions], 154 nullable=False, 155 ) 156 157 return self.sql(exp.cast(ts, datatype, dialect=self.dialect)) 158 159 160class ClickHouse(Dialect): 161 NORMALIZE_FUNCTIONS: bool | str = False 162 NULL_ORDERING = "nulls_are_last" 163 SUPPORTS_USER_DEFINED_TYPES = False 164 SAFE_DIVISION = True 165 LOG_BASE_FIRST: t.Optional[bool] = None 166 FORCE_EARLY_ALIAS_REF_EXPANSION = True 167 168 # https://github.com/ClickHouse/ClickHouse/issues/33935#issue-1112165779 169 NORMALIZATION_STRATEGY = NormalizationStrategy.CASE_SENSITIVE 170 171 UNESCAPED_SEQUENCES = { 172 "\\0": "\0", 173 } 174 175 CREATABLE_KIND_MAPPING = {"DATABASE": "SCHEMA"} 176 177 SET_OP_DISTINCT_BY_DEFAULT: t.Dict[t.Type[exp.Expression], t.Optional[bool]] = { 178 exp.Except: False, 179 exp.Intersect: False, 180 exp.Union: None, 181 } 182 183 class Tokenizer(tokens.Tokenizer): 184 COMMENTS = ["--", "#", "#!", ("/*", "*/")] 185 IDENTIFIERS = ['"', "`"] 186 IDENTIFIER_ESCAPES = ["\\"] 187 STRING_ESCAPES = ["'", "\\"] 188 BIT_STRINGS = [("0b", "")] 189 HEX_STRINGS = [("0x", ""), ("0X", "")] 190 HEREDOC_STRINGS = ["$"] 191 192 KEYWORDS = { 193 **tokens.Tokenizer.KEYWORDS, 194 "ATTACH": TokenType.COMMAND, 195 "DATE32": TokenType.DATE32, 196 "DATETIME64": TokenType.DATETIME64, 197 "DICTIONARY": TokenType.DICTIONARY, 198 "ENUM8": TokenType.ENUM8, 199 "ENUM16": TokenType.ENUM16, 200 "FINAL": TokenType.FINAL, 201 "FIXEDSTRING": TokenType.FIXEDSTRING, 202 "FLOAT32": TokenType.FLOAT, 203 "FLOAT64": TokenType.DOUBLE, 204 "GLOBAL": TokenType.GLOBAL, 205 "INT256": TokenType.INT256, 206 "LOWCARDINALITY": TokenType.LOWCARDINALITY, 207 "MAP": TokenType.MAP, 208 "NESTED": TokenType.NESTED, 209 "SAMPLE": TokenType.TABLE_SAMPLE, 210 "TUPLE": TokenType.STRUCT, 211 "UINT128": TokenType.UINT128, 212 "UINT16": TokenType.USMALLINT, 213 "UINT256": TokenType.UINT256, 214 "UINT32": TokenType.UINT, 215 "UINT64": TokenType.UBIGINT, 216 "UINT8": TokenType.UTINYINT, 217 "IPV4": TokenType.IPV4, 218 "IPV6": TokenType.IPV6, 219 "POINT": TokenType.POINT, 220 "RING": TokenType.RING, 221 "LINESTRING": TokenType.LINESTRING, 222 "MULTILINESTRING": TokenType.MULTILINESTRING, 223 "POLYGON": TokenType.POLYGON, 224 "MULTIPOLYGON": TokenType.MULTIPOLYGON, 225 "AGGREGATEFUNCTION": TokenType.AGGREGATEFUNCTION, 226 "SIMPLEAGGREGATEFUNCTION": TokenType.SIMPLEAGGREGATEFUNCTION, 227 "SYSTEM": TokenType.COMMAND, 228 "PREWHERE": TokenType.PREWHERE, 229 } 230 KEYWORDS.pop("/*+") 231 232 SINGLE_TOKENS = { 233 **tokens.Tokenizer.SINGLE_TOKENS, 234 "$": TokenType.HEREDOC_STRING, 235 } 236 237 class Parser(parser.Parser): 238 # Tested in ClickHouse's playground, it seems that the following two queries do the same thing 239 # * select x from t1 union all select x from t2 limit 1; 240 # * select x from t1 union all (select x from t2 limit 1); 241 MODIFIERS_ATTACHED_TO_SET_OP = False 242 INTERVAL_SPANS = False 243 244 FUNCTIONS = { 245 **parser.Parser.FUNCTIONS, 246 "ANY": exp.AnyValue.from_arg_list, 247 "ARRAYSUM": exp.ArraySum.from_arg_list, 248 "COUNTIF": _build_count_if, 249 "DATE_ADD": build_date_delta(exp.DateAdd, default_unit=None), 250 "DATEADD": build_date_delta(exp.DateAdd, default_unit=None), 251 "DATE_DIFF": build_date_delta(exp.DateDiff, default_unit=None), 252 "DATEDIFF": build_date_delta(exp.DateDiff, default_unit=None), 253 "DATE_FORMAT": _build_date_format, 254 "DATE_SUB": build_date_delta(exp.DateSub, default_unit=None), 255 "DATESUB": build_date_delta(exp.DateSub, default_unit=None), 256 "FORMATDATETIME": _build_date_format, 257 "JSONEXTRACTSTRING": build_json_extract_path( 258 exp.JSONExtractScalar, zero_based_indexing=False 259 ), 260 "MAP": parser.build_var_map, 261 "MATCH": exp.RegexpLike.from_arg_list, 262 "RANDCANONICAL": exp.Rand.from_arg_list, 263 "STR_TO_DATE": _build_str_to_date, 264 "TUPLE": exp.Struct.from_arg_list, 265 "TIMESTAMP_SUB": build_date_delta(exp.TimestampSub, default_unit=None), 266 "TIMESTAMPSUB": build_date_delta(exp.TimestampSub, default_unit=None), 267 "TIMESTAMP_ADD": build_date_delta(exp.TimestampAdd, default_unit=None), 268 "TIMESTAMPADD": build_date_delta(exp.TimestampAdd, default_unit=None), 269 "UNIQ": exp.ApproxDistinct.from_arg_list, 270 "XOR": lambda args: exp.Xor(expressions=args), 271 "MD5": exp.MD5Digest.from_arg_list, 272 "SHA256": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(256)), 273 "SHA512": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(512)), 274 } 275 276 AGG_FUNCTIONS = { 277 "count", 278 "min", 279 "max", 280 "sum", 281 "avg", 282 "any", 283 "stddevPop", 284 "stddevSamp", 285 "varPop", 286 "varSamp", 287 "corr", 288 "covarPop", 289 "covarSamp", 290 "entropy", 291 "exponentialMovingAverage", 292 "intervalLengthSum", 293 "kolmogorovSmirnovTest", 294 "mannWhitneyUTest", 295 "median", 296 "rankCorr", 297 "sumKahan", 298 "studentTTest", 299 "welchTTest", 300 "anyHeavy", 301 "anyLast", 302 "boundingRatio", 303 "first_value", 304 "last_value", 305 "argMin", 306 "argMax", 307 "avgWeighted", 308 "topK", 309 "topKWeighted", 310 "deltaSum", 311 "deltaSumTimestamp", 312 "groupArray", 313 "groupArrayLast", 314 "groupUniqArray", 315 "groupArrayInsertAt", 316 "groupArrayMovingAvg", 317 "groupArrayMovingSum", 318 "groupArraySample", 319 "groupBitAnd", 320 "groupBitOr", 321 "groupBitXor", 322 "groupBitmap", 323 "groupBitmapAnd", 324 "groupBitmapOr", 325 "groupBitmapXor", 326 "sumWithOverflow", 327 "sumMap", 328 "minMap", 329 "maxMap", 330 "skewSamp", 331 "skewPop", 332 "kurtSamp", 333 "kurtPop", 334 "uniq", 335 "uniqExact", 336 "uniqCombined", 337 "uniqCombined64", 338 "uniqHLL12", 339 "uniqTheta", 340 "quantile", 341 "quantiles", 342 "quantileExact", 343 "quantilesExact", 344 "quantileExactLow", 345 "quantilesExactLow", 346 "quantileExactHigh", 347 "quantilesExactHigh", 348 "quantileExactWeighted", 349 "quantilesExactWeighted", 350 "quantileTiming", 351 "quantilesTiming", 352 "quantileTimingWeighted", 353 "quantilesTimingWeighted", 354 "quantileDeterministic", 355 "quantilesDeterministic", 356 "quantileTDigest", 357 "quantilesTDigest", 358 "quantileTDigestWeighted", 359 "quantilesTDigestWeighted", 360 "quantileBFloat16", 361 "quantilesBFloat16", 362 "quantileBFloat16Weighted", 363 "quantilesBFloat16Weighted", 364 "simpleLinearRegression", 365 "stochasticLinearRegression", 366 "stochasticLogisticRegression", 367 "categoricalInformationValue", 368 "contingency", 369 "cramersV", 370 "cramersVBiasCorrected", 371 "theilsU", 372 "maxIntersections", 373 "maxIntersectionsPosition", 374 "meanZTest", 375 "quantileInterpolatedWeighted", 376 "quantilesInterpolatedWeighted", 377 "quantileGK", 378 "quantilesGK", 379 "sparkBar", 380 "sumCount", 381 "largestTriangleThreeBuckets", 382 "histogram", 383 "sequenceMatch", 384 "sequenceCount", 385 "windowFunnel", 386 "retention", 387 "uniqUpTo", 388 "sequenceNextNode", 389 "exponentialTimeDecayedAvg", 390 } 391 392 AGG_FUNCTIONS_SUFFIXES = [ 393 "If", 394 "Array", 395 "ArrayIf", 396 "Map", 397 "SimpleState", 398 "State", 399 "Merge", 400 "MergeState", 401 "ForEach", 402 "Distinct", 403 "OrDefault", 404 "OrNull", 405 "Resample", 406 "ArgMin", 407 "ArgMax", 408 ] 409 410 FUNC_TOKENS = { 411 *parser.Parser.FUNC_TOKENS, 412 TokenType.SET, 413 } 414 415 RESERVED_TOKENS = parser.Parser.RESERVED_TOKENS - {TokenType.SELECT} 416 417 ID_VAR_TOKENS = { 418 *parser.Parser.ID_VAR_TOKENS, 419 TokenType.LIKE, 420 } 421 422 AGG_FUNC_MAPPING = ( 423 lambda functions, suffixes: { 424 f"{f}{sfx}": (f, sfx) for sfx in (suffixes + [""]) for f in functions 425 } 426 )(AGG_FUNCTIONS, AGG_FUNCTIONS_SUFFIXES) 427 428 FUNCTIONS_WITH_ALIASED_ARGS = {*parser.Parser.FUNCTIONS_WITH_ALIASED_ARGS, "TUPLE"} 429 430 FUNCTION_PARSERS = { 431 **parser.Parser.FUNCTION_PARSERS, 432 "ARRAYJOIN": lambda self: self.expression(exp.Explode, this=self._parse_expression()), 433 "QUANTILE": lambda self: self._parse_quantile(), 434 "COLUMNS": lambda self: self._parse_columns(), 435 } 436 437 FUNCTION_PARSERS.pop("MATCH") 438 439 NO_PAREN_FUNCTION_PARSERS = parser.Parser.NO_PAREN_FUNCTION_PARSERS.copy() 440 NO_PAREN_FUNCTION_PARSERS.pop("ANY") 441 442 NO_PAREN_FUNCTIONS = parser.Parser.NO_PAREN_FUNCTIONS.copy() 443 NO_PAREN_FUNCTIONS.pop(TokenType.CURRENT_TIMESTAMP) 444 445 RANGE_PARSERS = { 446 **parser.Parser.RANGE_PARSERS, 447 TokenType.GLOBAL: lambda self, this: self._match(TokenType.IN) 448 and self._parse_in(this, is_global=True), 449 } 450 451 # The PLACEHOLDER entry is popped because 1) it doesn't affect Clickhouse (it corresponds to 452 # the postgres-specific JSONBContains parser) and 2) it makes parsing the ternary op simpler. 453 COLUMN_OPERATORS = parser.Parser.COLUMN_OPERATORS.copy() 454 COLUMN_OPERATORS.pop(TokenType.PLACEHOLDER) 455 456 JOIN_KINDS = { 457 *parser.Parser.JOIN_KINDS, 458 TokenType.ANY, 459 TokenType.ASOF, 460 TokenType.ARRAY, 461 } 462 463 TABLE_ALIAS_TOKENS = parser.Parser.TABLE_ALIAS_TOKENS - { 464 TokenType.ANY, 465 TokenType.ARRAY, 466 TokenType.FINAL, 467 TokenType.FORMAT, 468 TokenType.SETTINGS, 469 } 470 471 ALIAS_TOKENS = parser.Parser.ALIAS_TOKENS - { 472 TokenType.FORMAT, 473 } 474 475 LOG_DEFAULTS_TO_LN = True 476 477 QUERY_MODIFIER_PARSERS = { 478 **parser.Parser.QUERY_MODIFIER_PARSERS, 479 TokenType.SETTINGS: lambda self: ( 480 "settings", 481 self._advance() or self._parse_csv(self._parse_assignment), 482 ), 483 TokenType.FORMAT: lambda self: ("format", self._advance() or self._parse_id_var()), 484 } 485 486 CONSTRAINT_PARSERS = { 487 **parser.Parser.CONSTRAINT_PARSERS, 488 "INDEX": lambda self: self._parse_index_constraint(), 489 "CODEC": lambda self: self._parse_compress(), 490 } 491 492 ALTER_PARSERS = { 493 **parser.Parser.ALTER_PARSERS, 494 "REPLACE": lambda self: self._parse_alter_table_replace(), 495 } 496 497 SCHEMA_UNNAMED_CONSTRAINTS = { 498 *parser.Parser.SCHEMA_UNNAMED_CONSTRAINTS, 499 "INDEX", 500 } 501 502 PLACEHOLDER_PARSERS = { 503 **parser.Parser.PLACEHOLDER_PARSERS, 504 TokenType.L_BRACE: lambda self: self._parse_query_parameter(), 505 } 506 507 # https://clickhouse.com/docs/en/sql-reference/statements/create/function 508 def _parse_user_defined_function_expression(self) -> t.Optional[exp.Expression]: 509 return self._parse_lambda() 510 511 def _parse_types( 512 self, check_func: bool = False, schema: bool = False, allow_identifiers: bool = True 513 ) -> t.Optional[exp.Expression]: 514 dtype = super()._parse_types( 515 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 516 ) 517 if isinstance(dtype, exp.DataType) and dtype.args.get("nullable") is not True: 518 # Mark every type as non-nullable which is ClickHouse's default, unless it's 519 # already marked as nullable. This marker helps us transpile types from other 520 # dialects to ClickHouse, so that we can e.g. produce `CAST(x AS Nullable(String))` 521 # from `CAST(x AS TEXT)`. If there is a `NULL` value in `x`, the former would 522 # fail in ClickHouse without the `Nullable` type constructor. 523 dtype.set("nullable", False) 524 525 return dtype 526 527 def _parse_extract(self) -> exp.Extract | exp.Anonymous: 528 index = self._index 529 this = self._parse_bitwise() 530 if self._match(TokenType.FROM): 531 self._retreat(index) 532 return super()._parse_extract() 533 534 # We return Anonymous here because extract and regexpExtract have different semantics, 535 # so parsing extract(foo, bar) into RegexpExtract can potentially break queries. E.g., 536 # `extract('foobar', 'b')` works, but ClickHouse crashes for `regexpExtract('foobar', 'b')`. 537 # 538 # TODO: can we somehow convert the former into an equivalent `regexpExtract` call? 539 self._match(TokenType.COMMA) 540 return self.expression( 541 exp.Anonymous, this="extract", expressions=[this, self._parse_bitwise()] 542 ) 543 544 def _parse_assignment(self) -> t.Optional[exp.Expression]: 545 this = super()._parse_assignment() 546 547 if self._match(TokenType.PLACEHOLDER): 548 return self.expression( 549 exp.If, 550 this=this, 551 true=self._parse_assignment(), 552 false=self._match(TokenType.COLON) and self._parse_assignment(), 553 ) 554 555 return this 556 557 def _parse_query_parameter(self) -> t.Optional[exp.Expression]: 558 """ 559 Parse a placeholder expression like SELECT {abc: UInt32} or FROM {table: Identifier} 560 https://clickhouse.com/docs/en/sql-reference/syntax#defining-and-using-query-parameters 561 """ 562 this = self._parse_id_var() 563 self._match(TokenType.COLON) 564 kind = self._parse_types(check_func=False, allow_identifiers=False) or ( 565 self._match_text_seq("IDENTIFIER") and "Identifier" 566 ) 567 568 if not kind: 569 self.raise_error("Expecting a placeholder type or 'Identifier' for tables") 570 elif not self._match(TokenType.R_BRACE): 571 self.raise_error("Expecting }") 572 573 return self.expression(exp.Placeholder, this=this, kind=kind) 574 575 def _parse_in(self, this: t.Optional[exp.Expression], is_global: bool = False) -> exp.In: 576 this = super()._parse_in(this) 577 this.set("is_global", is_global) 578 return this 579 580 def _parse_table( 581 self, 582 schema: bool = False, 583 joins: bool = False, 584 alias_tokens: t.Optional[t.Collection[TokenType]] = None, 585 parse_bracket: bool = False, 586 is_db_reference: bool = False, 587 parse_partition: bool = False, 588 ) -> t.Optional[exp.Expression]: 589 this = super()._parse_table( 590 schema=schema, 591 joins=joins, 592 alias_tokens=alias_tokens, 593 parse_bracket=parse_bracket, 594 is_db_reference=is_db_reference, 595 ) 596 597 if self._match(TokenType.FINAL): 598 this = self.expression(exp.Final, this=this) 599 600 return this 601 602 def _parse_position(self, haystack_first: bool = False) -> exp.StrPosition: 603 return super()._parse_position(haystack_first=True) 604 605 # https://clickhouse.com/docs/en/sql-reference/statements/select/with/ 606 def _parse_cte(self) -> exp.CTE: 607 # WITH <identifier> AS <subquery expression> 608 cte: t.Optional[exp.CTE] = self._try_parse(super()._parse_cte) 609 610 if not cte: 611 # WITH <expression> AS <identifier> 612 cte = self.expression( 613 exp.CTE, 614 this=self._parse_assignment(), 615 alias=self._parse_table_alias(), 616 scalar=True, 617 ) 618 619 return cte 620 621 def _parse_join_parts( 622 self, 623 ) -> t.Tuple[t.Optional[Token], t.Optional[Token], t.Optional[Token]]: 624 is_global = self._match(TokenType.GLOBAL) and self._prev 625 kind_pre = self._match_set(self.JOIN_KINDS, advance=False) and self._prev 626 627 if kind_pre: 628 kind = self._match_set(self.JOIN_KINDS) and self._prev 629 side = self._match_set(self.JOIN_SIDES) and self._prev 630 return is_global, side, kind 631 632 return ( 633 is_global, 634 self._match_set(self.JOIN_SIDES) and self._prev, 635 self._match_set(self.JOIN_KINDS) and self._prev, 636 ) 637 638 def _parse_join( 639 self, skip_join_token: bool = False, parse_bracket: bool = False 640 ) -> t.Optional[exp.Join]: 641 join = super()._parse_join(skip_join_token=skip_join_token, parse_bracket=True) 642 if join: 643 join.set("global", join.args.pop("method", None)) 644 645 # tbl ARRAY JOIN arr <-- this should be a `Column` reference, not a `Table` 646 # https://clickhouse.com/docs/en/sql-reference/statements/select/array-join 647 if join.kind == "ARRAY": 648 for table in join.find_all(exp.Table): 649 table.replace(table.to_column()) 650 651 return join 652 653 def _parse_function( 654 self, 655 functions: t.Optional[t.Dict[str, t.Callable]] = None, 656 anonymous: bool = False, 657 optional_parens: bool = True, 658 any_token: bool = False, 659 ) -> t.Optional[exp.Expression]: 660 expr = super()._parse_function( 661 functions=functions, 662 anonymous=anonymous, 663 optional_parens=optional_parens, 664 any_token=any_token, 665 ) 666 667 func = expr.this if isinstance(expr, exp.Window) else expr 668 669 # Aggregate functions can be split in 2 parts: <func_name><suffix> 670 parts = ( 671 self.AGG_FUNC_MAPPING.get(func.this) if isinstance(func, exp.Anonymous) else None 672 ) 673 674 if parts: 675 anon_func: exp.Anonymous = t.cast(exp.Anonymous, func) 676 params = self._parse_func_params(anon_func) 677 678 kwargs = { 679 "this": anon_func.this, 680 "expressions": anon_func.expressions, 681 } 682 if parts[1]: 683 kwargs["parts"] = parts 684 exp_class: t.Type[exp.Expression] = ( 685 exp.CombinedParameterizedAgg if params else exp.CombinedAggFunc 686 ) 687 else: 688 exp_class = exp.ParameterizedAgg if params else exp.AnonymousAggFunc 689 690 kwargs["exp_class"] = exp_class 691 if params: 692 kwargs["params"] = params 693 694 func = self.expression(**kwargs) 695 696 if isinstance(expr, exp.Window): 697 # The window's func was parsed as Anonymous in base parser, fix its 698 # type to be ClickHouse style CombinedAnonymousAggFunc / AnonymousAggFunc 699 expr.set("this", func) 700 elif params: 701 # Params have blocked super()._parse_function() from parsing the following window 702 # (if that exists) as they're standing between the function call and the window spec 703 expr = self._parse_window(func) 704 else: 705 expr = func 706 707 return expr 708 709 def _parse_func_params( 710 self, this: t.Optional[exp.Func] = None 711 ) -> t.Optional[t.List[exp.Expression]]: 712 if self._match_pair(TokenType.R_PAREN, TokenType.L_PAREN): 713 return self._parse_csv(self._parse_lambda) 714 715 if self._match(TokenType.L_PAREN): 716 params = self._parse_csv(self._parse_lambda) 717 self._match_r_paren(this) 718 return params 719 720 return None 721 722 def _parse_quantile(self) -> exp.Quantile: 723 this = self._parse_lambda() 724 params = self._parse_func_params() 725 if params: 726 return self.expression(exp.Quantile, this=params[0], quantile=this) 727 return self.expression(exp.Quantile, this=this, quantile=exp.Literal.number(0.5)) 728 729 def _parse_wrapped_id_vars(self, optional: bool = False) -> t.List[exp.Expression]: 730 return super()._parse_wrapped_id_vars(optional=True) 731 732 def _parse_primary_key( 733 self, wrapped_optional: bool = False, in_props: bool = False 734 ) -> exp.PrimaryKeyColumnConstraint | exp.PrimaryKey: 735 return super()._parse_primary_key( 736 wrapped_optional=wrapped_optional or in_props, in_props=in_props 737 ) 738 739 def _parse_on_property(self) -> t.Optional[exp.Expression]: 740 index = self._index 741 if self._match_text_seq("CLUSTER"): 742 this = self._parse_id_var() 743 if this: 744 return self.expression(exp.OnCluster, this=this) 745 else: 746 self._retreat(index) 747 return None 748 749 def _parse_index_constraint( 750 self, kind: t.Optional[str] = None 751 ) -> exp.IndexColumnConstraint: 752 # INDEX name1 expr TYPE type1(args) GRANULARITY value 753 this = self._parse_id_var() 754 expression = self._parse_assignment() 755 756 index_type = self._match_text_seq("TYPE") and ( 757 self._parse_function() or self._parse_var() 758 ) 759 760 granularity = self._match_text_seq("GRANULARITY") and self._parse_term() 761 762 return self.expression( 763 exp.IndexColumnConstraint, 764 this=this, 765 expression=expression, 766 index_type=index_type, 767 granularity=granularity, 768 ) 769 770 def _parse_partition(self) -> t.Optional[exp.Partition]: 771 # https://clickhouse.com/docs/en/sql-reference/statements/alter/partition#how-to-set-partition-expression 772 if not self._match(TokenType.PARTITION): 773 return None 774 775 if self._match_text_seq("ID"): 776 # Corresponds to the PARTITION ID <string_value> syntax 777 expressions: t.List[exp.Expression] = [ 778 self.expression(exp.PartitionId, this=self._parse_string()) 779 ] 780 else: 781 expressions = self._parse_expressions() 782 783 return self.expression(exp.Partition, expressions=expressions) 784 785 def _parse_alter_table_replace(self) -> t.Optional[exp.Expression]: 786 partition = self._parse_partition() 787 788 if not partition or not self._match(TokenType.FROM): 789 return None 790 791 return self.expression( 792 exp.ReplacePartition, expression=partition, source=self._parse_table_parts() 793 ) 794 795 def _parse_projection_def(self) -> t.Optional[exp.ProjectionDef]: 796 if not self._match_text_seq("PROJECTION"): 797 return None 798 799 return self.expression( 800 exp.ProjectionDef, 801 this=self._parse_id_var(), 802 expression=self._parse_wrapped(self._parse_statement), 803 ) 804 805 def _parse_constraint(self) -> t.Optional[exp.Expression]: 806 return super()._parse_constraint() or self._parse_projection_def() 807 808 def _parse_alias( 809 self, this: t.Optional[exp.Expression], explicit: bool = False 810 ) -> t.Optional[exp.Expression]: 811 # In clickhouse "SELECT <expr> APPLY(...)" is a query modifier, 812 # so "APPLY" shouldn't be parsed as <expr>'s alias. However, "SELECT <expr> apply" is a valid alias 813 if self._match_pair(TokenType.APPLY, TokenType.L_PAREN, advance=False): 814 return this 815 816 return super()._parse_alias(this=this, explicit=explicit) 817 818 def _parse_expression(self) -> t.Optional[exp.Expression]: 819 this = super()._parse_expression() 820 821 # Clickhouse allows "SELECT <expr> [APPLY(func)] [...]]" modifier 822 while self._match_pair(TokenType.APPLY, TokenType.L_PAREN): 823 this = exp.Apply(this=this, expression=self._parse_var(any_token=True)) 824 self._match(TokenType.R_PAREN) 825 826 return this 827 828 def _parse_columns(self) -> exp.Expression: 829 this: exp.Expression = self.expression(exp.Columns, this=self._parse_lambda()) 830 831 while self._next and self._match_text_seq(")", "APPLY", "("): 832 self._match(TokenType.R_PAREN) 833 this = exp.Apply(this=this, expression=self._parse_var(any_token=True)) 834 return this 835 836 class Generator(generator.Generator): 837 QUERY_HINTS = False 838 STRUCT_DELIMITER = ("(", ")") 839 NVL2_SUPPORTED = False 840 TABLESAMPLE_REQUIRES_PARENS = False 841 TABLESAMPLE_SIZE_IS_ROWS = False 842 TABLESAMPLE_KEYWORDS = "SAMPLE" 843 LAST_DAY_SUPPORTS_DATE_PART = False 844 CAN_IMPLEMENT_ARRAY_ANY = True 845 SUPPORTS_TO_NUMBER = False 846 JOIN_HINTS = False 847 TABLE_HINTS = False 848 GROUPINGS_SEP = "" 849 SET_OP_MODIFIERS = False 850 SUPPORTS_TABLE_ALIAS_COLUMNS = False 851 VALUES_AS_TABLE = False 852 853 STRING_TYPE_MAPPING = { 854 exp.DataType.Type.CHAR: "String", 855 exp.DataType.Type.LONGBLOB: "String", 856 exp.DataType.Type.LONGTEXT: "String", 857 exp.DataType.Type.MEDIUMBLOB: "String", 858 exp.DataType.Type.MEDIUMTEXT: "String", 859 exp.DataType.Type.TINYBLOB: "String", 860 exp.DataType.Type.TINYTEXT: "String", 861 exp.DataType.Type.TEXT: "String", 862 exp.DataType.Type.VARBINARY: "String", 863 exp.DataType.Type.VARCHAR: "String", 864 } 865 866 SUPPORTED_JSON_PATH_PARTS = { 867 exp.JSONPathKey, 868 exp.JSONPathRoot, 869 exp.JSONPathSubscript, 870 } 871 872 TYPE_MAPPING = { 873 **generator.Generator.TYPE_MAPPING, 874 **STRING_TYPE_MAPPING, 875 exp.DataType.Type.ARRAY: "Array", 876 exp.DataType.Type.BOOLEAN: "Bool", 877 exp.DataType.Type.BIGINT: "Int64", 878 exp.DataType.Type.DATE32: "Date32", 879 exp.DataType.Type.DATETIME: "DateTime", 880 exp.DataType.Type.DATETIME64: "DateTime64", 881 exp.DataType.Type.DECIMAL: "Decimal", 882 exp.DataType.Type.DECIMAL32: "Decimal32", 883 exp.DataType.Type.DECIMAL64: "Decimal64", 884 exp.DataType.Type.DECIMAL128: "Decimal128", 885 exp.DataType.Type.DECIMAL256: "Decimal256", 886 exp.DataType.Type.TIMESTAMP: "DateTime", 887 exp.DataType.Type.TIMESTAMPTZ: "DateTime", 888 exp.DataType.Type.DOUBLE: "Float64", 889 exp.DataType.Type.ENUM: "Enum", 890 exp.DataType.Type.ENUM8: "Enum8", 891 exp.DataType.Type.ENUM16: "Enum16", 892 exp.DataType.Type.FIXEDSTRING: "FixedString", 893 exp.DataType.Type.FLOAT: "Float32", 894 exp.DataType.Type.INT: "Int32", 895 exp.DataType.Type.MEDIUMINT: "Int32", 896 exp.DataType.Type.INT128: "Int128", 897 exp.DataType.Type.INT256: "Int256", 898 exp.DataType.Type.LOWCARDINALITY: "LowCardinality", 899 exp.DataType.Type.MAP: "Map", 900 exp.DataType.Type.NESTED: "Nested", 901 exp.DataType.Type.SMALLINT: "Int16", 902 exp.DataType.Type.STRUCT: "Tuple", 903 exp.DataType.Type.TINYINT: "Int8", 904 exp.DataType.Type.UBIGINT: "UInt64", 905 exp.DataType.Type.UINT: "UInt32", 906 exp.DataType.Type.UINT128: "UInt128", 907 exp.DataType.Type.UINT256: "UInt256", 908 exp.DataType.Type.USMALLINT: "UInt16", 909 exp.DataType.Type.UTINYINT: "UInt8", 910 exp.DataType.Type.IPV4: "IPv4", 911 exp.DataType.Type.IPV6: "IPv6", 912 exp.DataType.Type.POINT: "Point", 913 exp.DataType.Type.RING: "Ring", 914 exp.DataType.Type.LINESTRING: "LineString", 915 exp.DataType.Type.MULTILINESTRING: "MultiLineString", 916 exp.DataType.Type.POLYGON: "Polygon", 917 exp.DataType.Type.MULTIPOLYGON: "MultiPolygon", 918 exp.DataType.Type.AGGREGATEFUNCTION: "AggregateFunction", 919 exp.DataType.Type.SIMPLEAGGREGATEFUNCTION: "SimpleAggregateFunction", 920 } 921 922 TRANSFORMS = { 923 **generator.Generator.TRANSFORMS, 924 exp.AnyValue: rename_func("any"), 925 exp.ApproxDistinct: rename_func("uniq"), 926 exp.ArrayFilter: lambda self, e: self.func("arrayFilter", e.expression, e.this), 927 exp.ArraySize: rename_func("LENGTH"), 928 exp.ArraySum: rename_func("arraySum"), 929 exp.ArgMax: arg_max_or_min_no_count("argMax"), 930 exp.ArgMin: arg_max_or_min_no_count("argMin"), 931 exp.Array: inline_array_sql, 932 exp.CastToStrType: rename_func("CAST"), 933 exp.CountIf: rename_func("countIf"), 934 exp.CompressColumnConstraint: lambda self, 935 e: f"CODEC({self.expressions(e, key='this', flat=True)})", 936 exp.ComputedColumnConstraint: lambda self, 937 e: f"{'MATERIALIZED' if e.args.get('persisted') else 'ALIAS'} {self.sql(e, 'this')}", 938 exp.CurrentDate: lambda self, e: self.func("CURRENT_DATE"), 939 exp.DateAdd: _datetime_delta_sql("DATE_ADD"), 940 exp.DateDiff: _datetime_delta_sql("DATE_DIFF"), 941 exp.DateStrToDate: rename_func("toDate"), 942 exp.DateSub: _datetime_delta_sql("DATE_SUB"), 943 exp.Explode: rename_func("arrayJoin"), 944 exp.Final: lambda self, e: f"{self.sql(e, 'this')} FINAL", 945 exp.IsNan: rename_func("isNaN"), 946 exp.JSONExtract: json_extract_segments("JSONExtractString", quoted_index=False), 947 exp.JSONExtractScalar: json_extract_segments("JSONExtractString", quoted_index=False), 948 exp.JSONPathKey: json_path_key_only_name, 949 exp.JSONPathRoot: lambda *_: "", 950 exp.Map: lambda self, e: _lower_func(var_map_sql(self, e)), 951 exp.Nullif: rename_func("nullIf"), 952 exp.PartitionedByProperty: lambda self, e: f"PARTITION BY {self.sql(e, 'this')}", 953 exp.Pivot: no_pivot_sql, 954 exp.Quantile: _quantile_sql, 955 exp.RegexpLike: lambda self, e: self.func("match", e.this, e.expression), 956 exp.Rand: rename_func("randCanonical"), 957 exp.StartsWith: rename_func("startsWith"), 958 exp.StrPosition: lambda self, e: self.func( 959 "position", e.this, e.args.get("substr"), e.args.get("position") 960 ), 961 exp.TimeToStr: lambda self, e: self.func( 962 "formatDateTime", e.this, self.format_time(e), e.args.get("zone") 963 ), 964 exp.TimeStrToTime: _timestrtotime_sql, 965 exp.TimestampAdd: _datetime_delta_sql("TIMESTAMP_ADD"), 966 exp.TimestampSub: _datetime_delta_sql("TIMESTAMP_SUB"), 967 exp.VarMap: lambda self, e: _lower_func(var_map_sql(self, e)), 968 exp.Xor: lambda self, e: self.func("xor", e.this, e.expression, *e.expressions), 969 exp.MD5Digest: rename_func("MD5"), 970 exp.MD5: lambda self, e: self.func("LOWER", self.func("HEX", self.func("MD5", e.this))), 971 exp.SHA: rename_func("SHA1"), 972 exp.SHA2: sha256_sql, 973 exp.UnixToTime: _unix_to_time_sql, 974 exp.TimestampTrunc: timestamptrunc_sql(zone=True), 975 exp.Trim: trim_sql, 976 exp.Variance: rename_func("varSamp"), 977 exp.SchemaCommentProperty: lambda self, e: self.naked_property(e), 978 exp.Stddev: rename_func("stddevSamp"), 979 exp.Chr: rename_func("CHAR"), 980 exp.Lag: lambda self, e: self.func( 981 "lagInFrame", e.this, e.args.get("offset"), e.args.get("default") 982 ), 983 exp.Lead: lambda self, e: self.func( 984 "leadInFrame", e.this, e.args.get("offset"), e.args.get("default") 985 ), 986 } 987 988 PROPERTIES_LOCATION = { 989 **generator.Generator.PROPERTIES_LOCATION, 990 exp.OnCluster: exp.Properties.Location.POST_NAME, 991 exp.PartitionedByProperty: exp.Properties.Location.POST_SCHEMA, 992 exp.ToTableProperty: exp.Properties.Location.POST_NAME, 993 exp.VolatileProperty: exp.Properties.Location.UNSUPPORTED, 994 } 995 996 # There's no list in docs, but it can be found in Clickhouse code 997 # see `ClickHouse/src/Parsers/ParserCreate*.cpp` 998 ON_CLUSTER_TARGETS = { 999 "SCHEMA", # Transpiled CREATE SCHEMA may have OnCluster property set 1000 "DATABASE", 1001 "TABLE", 1002 "VIEW", 1003 "DICTIONARY", 1004 "INDEX", 1005 "FUNCTION", 1006 "NAMED COLLECTION", 1007 } 1008 1009 # https://clickhouse.com/docs/en/sql-reference/data-types/nullable 1010 NON_NULLABLE_TYPES = { 1011 exp.DataType.Type.ARRAY, 1012 exp.DataType.Type.MAP, 1013 exp.DataType.Type.STRUCT, 1014 exp.DataType.Type.POINT, 1015 exp.DataType.Type.RING, 1016 exp.DataType.Type.LINESTRING, 1017 exp.DataType.Type.MULTILINESTRING, 1018 exp.DataType.Type.POLYGON, 1019 exp.DataType.Type.MULTIPOLYGON, 1020 } 1021 1022 def strtodate_sql(self, expression: exp.StrToDate) -> str: 1023 strtodate_sql = self.function_fallback_sql(expression) 1024 1025 if not isinstance(expression.parent, exp.Cast): 1026 # StrToDate returns DATEs in other dialects (eg. postgres), so 1027 # this branch aims to improve the transpilation to clickhouse 1028 return f"CAST({strtodate_sql} AS DATE)" 1029 1030 return strtodate_sql 1031 1032 def cast_sql(self, expression: exp.Cast, safe_prefix: t.Optional[str] = None) -> str: 1033 this = expression.this 1034 1035 if isinstance(this, exp.StrToDate) and expression.to == exp.DataType.build("datetime"): 1036 return self.sql(this) 1037 1038 return super().cast_sql(expression, safe_prefix=safe_prefix) 1039 1040 def trycast_sql(self, expression: exp.TryCast) -> str: 1041 dtype = expression.to 1042 if not dtype.is_type(*self.NON_NULLABLE_TYPES, check_nullable=True): 1043 # Casting x into Nullable(T) appears to behave similarly to TRY_CAST(x AS T) 1044 dtype.set("nullable", True) 1045 1046 return super().cast_sql(expression) 1047 1048 def _jsonpathsubscript_sql(self, expression: exp.JSONPathSubscript) -> str: 1049 this = self.json_path_part(expression.this) 1050 return str(int(this) + 1) if is_int(this) else this 1051 1052 def likeproperty_sql(self, expression: exp.LikeProperty) -> str: 1053 return f"AS {self.sql(expression, 'this')}" 1054 1055 def _any_to_has( 1056 self, 1057 expression: exp.EQ | exp.NEQ, 1058 default: t.Callable[[t.Any], str], 1059 prefix: str = "", 1060 ) -> str: 1061 if isinstance(expression.left, exp.Any): 1062 arr = expression.left 1063 this = expression.right 1064 elif isinstance(expression.right, exp.Any): 1065 arr = expression.right 1066 this = expression.left 1067 else: 1068 return default(expression) 1069 1070 return prefix + self.func("has", arr.this.unnest(), this) 1071 1072 def eq_sql(self, expression: exp.EQ) -> str: 1073 return self._any_to_has(expression, super().eq_sql) 1074 1075 def neq_sql(self, expression: exp.NEQ) -> str: 1076 return self._any_to_has(expression, super().neq_sql, "NOT ") 1077 1078 def regexpilike_sql(self, expression: exp.RegexpILike) -> str: 1079 # Manually add a flag to make the search case-insensitive 1080 regex = self.func("CONCAT", "'(?i)'", expression.expression) 1081 return self.func("match", expression.this, regex) 1082 1083 def datatype_sql(self, expression: exp.DataType) -> str: 1084 # String is the standard ClickHouse type, every other variant is just an alias. 1085 # Additionally, any supplied length parameter will be ignored. 1086 # 1087 # https://clickhouse.com/docs/en/sql-reference/data-types/string 1088 if expression.this in self.STRING_TYPE_MAPPING: 1089 dtype = "String" 1090 else: 1091 dtype = super().datatype_sql(expression) 1092 1093 # This section changes the type to `Nullable(...)` if the following conditions hold: 1094 # - It's marked as nullable - this ensures we won't wrap ClickHouse types with `Nullable` 1095 # and change their semantics 1096 # - It's not the key type of a `Map`. This is because ClickHouse enforces the following 1097 # constraint: "Type of Map key must be a type, that can be represented by integer or 1098 # String or FixedString (possibly LowCardinality) or UUID or IPv6" 1099 # - It's not a composite type, e.g. `Nullable(Array(...))` is not a valid type 1100 parent = expression.parent 1101 nullable = expression.args.get("nullable") 1102 if nullable is True or ( 1103 nullable is None 1104 and not ( 1105 isinstance(parent, exp.DataType) 1106 and parent.is_type(exp.DataType.Type.MAP, check_nullable=True) 1107 and expression.index in (None, 0) 1108 ) 1109 and not expression.is_type(*self.NON_NULLABLE_TYPES, check_nullable=True) 1110 ): 1111 dtype = f"Nullable({dtype})" 1112 1113 return dtype 1114 1115 def cte_sql(self, expression: exp.CTE) -> str: 1116 if expression.args.get("scalar"): 1117 this = self.sql(expression, "this") 1118 alias = self.sql(expression, "alias") 1119 return f"{this} AS {alias}" 1120 1121 return super().cte_sql(expression) 1122 1123 def after_limit_modifiers(self, expression: exp.Expression) -> t.List[str]: 1124 return super().after_limit_modifiers(expression) + [ 1125 ( 1126 self.seg("SETTINGS ") + self.expressions(expression, key="settings", flat=True) 1127 if expression.args.get("settings") 1128 else "" 1129 ), 1130 ( 1131 self.seg("FORMAT ") + self.sql(expression, "format") 1132 if expression.args.get("format") 1133 else "" 1134 ), 1135 ] 1136 1137 def parameterizedagg_sql(self, expression: exp.ParameterizedAgg) -> str: 1138 params = self.expressions(expression, key="params", flat=True) 1139 return self.func(expression.name, *expression.expressions) + f"({params})" 1140 1141 def anonymousaggfunc_sql(self, expression: exp.AnonymousAggFunc) -> str: 1142 return self.func(expression.name, *expression.expressions) 1143 1144 def combinedaggfunc_sql(self, expression: exp.CombinedAggFunc) -> str: 1145 return self.anonymousaggfunc_sql(expression) 1146 1147 def combinedparameterizedagg_sql(self, expression: exp.CombinedParameterizedAgg) -> str: 1148 return self.parameterizedagg_sql(expression) 1149 1150 def placeholder_sql(self, expression: exp.Placeholder) -> str: 1151 return f"{{{expression.name}: {self.sql(expression, 'kind')}}}" 1152 1153 def oncluster_sql(self, expression: exp.OnCluster) -> str: 1154 return f"ON CLUSTER {self.sql(expression, 'this')}" 1155 1156 def createable_sql(self, expression: exp.Create, locations: t.DefaultDict) -> str: 1157 if expression.kind in self.ON_CLUSTER_TARGETS and locations.get( 1158 exp.Properties.Location.POST_NAME 1159 ): 1160 this_name = self.sql( 1161 expression.this if isinstance(expression.this, exp.Schema) else expression, 1162 "this", 1163 ) 1164 this_properties = " ".join( 1165 [self.sql(prop) for prop in locations[exp.Properties.Location.POST_NAME]] 1166 ) 1167 this_schema = self.schema_columns_sql(expression.this) 1168 this_schema = f"{self.sep()}{this_schema}" if this_schema else "" 1169 1170 return f"{this_name}{self.sep()}{this_properties}{this_schema}" 1171 1172 return super().createable_sql(expression, locations) 1173 1174 def create_sql(self, expression: exp.Create) -> str: 1175 # The comment property comes last in CTAS statements, i.e. after the query 1176 query = expression.expression 1177 if isinstance(query, exp.Query): 1178 comment_prop = expression.find(exp.SchemaCommentProperty) 1179 if comment_prop: 1180 comment_prop.pop() 1181 query.replace(exp.paren(query)) 1182 else: 1183 comment_prop = None 1184 1185 create_sql = super().create_sql(expression) 1186 1187 comment_sql = self.sql(comment_prop) 1188 comment_sql = f" {comment_sql}" if comment_sql else "" 1189 1190 return f"{create_sql}{comment_sql}" 1191 1192 def prewhere_sql(self, expression: exp.PreWhere) -> str: 1193 this = self.indent(self.sql(expression, "this")) 1194 return f"{self.seg('PREWHERE')}{self.sep()}{this}" 1195 1196 def indexcolumnconstraint_sql(self, expression: exp.IndexColumnConstraint) -> str: 1197 this = self.sql(expression, "this") 1198 this = f" {this}" if this else "" 1199 expr = self.sql(expression, "expression") 1200 expr = f" {expr}" if expr else "" 1201 index_type = self.sql(expression, "index_type") 1202 index_type = f" TYPE {index_type}" if index_type else "" 1203 granularity = self.sql(expression, "granularity") 1204 granularity = f" GRANULARITY {granularity}" if granularity else "" 1205 1206 return f"INDEX{this}{expr}{index_type}{granularity}" 1207 1208 def partition_sql(self, expression: exp.Partition) -> str: 1209 return f"PARTITION {self.expressions(expression, flat=True)}" 1210 1211 def partitionid_sql(self, expression: exp.PartitionId) -> str: 1212 return f"ID {self.sql(expression.this)}" 1213 1214 def replacepartition_sql(self, expression: exp.ReplacePartition) -> str: 1215 return ( 1216 f"REPLACE {self.sql(expression.expression)} FROM {self.sql(expression, 'source')}" 1217 ) 1218 1219 def projectiondef_sql(self, expression: exp.ProjectionDef) -> str: 1220 return f"PROJECTION {self.sql(expression.this)} {self.wrap(expression.expression)}" 1221 1222 def is_sql(self, expression: exp.Is) -> str: 1223 is_sql = super().is_sql(expression) 1224 1225 if isinstance(expression.parent, exp.Not) and isinstance(expression.this, exp.Subquery): 1226 # WHERE (SELECT ...) IS NOT NULL -> NOT ((SELECT ...) IS NULL) 1227 is_sql = self.wrap(is_sql) 1228 1229 return is_sql
161class ClickHouse(Dialect): 162 NORMALIZE_FUNCTIONS: bool | str = False 163 NULL_ORDERING = "nulls_are_last" 164 SUPPORTS_USER_DEFINED_TYPES = False 165 SAFE_DIVISION = True 166 LOG_BASE_FIRST: t.Optional[bool] = None 167 FORCE_EARLY_ALIAS_REF_EXPANSION = True 168 169 # https://github.com/ClickHouse/ClickHouse/issues/33935#issue-1112165779 170 NORMALIZATION_STRATEGY = NormalizationStrategy.CASE_SENSITIVE 171 172 UNESCAPED_SEQUENCES = { 173 "\\0": "\0", 174 } 175 176 CREATABLE_KIND_MAPPING = {"DATABASE": "SCHEMA"} 177 178 SET_OP_DISTINCT_BY_DEFAULT: t.Dict[t.Type[exp.Expression], t.Optional[bool]] = { 179 exp.Except: False, 180 exp.Intersect: False, 181 exp.Union: None, 182 } 183 184 class Tokenizer(tokens.Tokenizer): 185 COMMENTS = ["--", "#", "#!", ("/*", "*/")] 186 IDENTIFIERS = ['"', "`"] 187 IDENTIFIER_ESCAPES = ["\\"] 188 STRING_ESCAPES = ["'", "\\"] 189 BIT_STRINGS = [("0b", "")] 190 HEX_STRINGS = [("0x", ""), ("0X", "")] 191 HEREDOC_STRINGS = ["$"] 192 193 KEYWORDS = { 194 **tokens.Tokenizer.KEYWORDS, 195 "ATTACH": TokenType.COMMAND, 196 "DATE32": TokenType.DATE32, 197 "DATETIME64": TokenType.DATETIME64, 198 "DICTIONARY": TokenType.DICTIONARY, 199 "ENUM8": TokenType.ENUM8, 200 "ENUM16": TokenType.ENUM16, 201 "FINAL": TokenType.FINAL, 202 "FIXEDSTRING": TokenType.FIXEDSTRING, 203 "FLOAT32": TokenType.FLOAT, 204 "FLOAT64": TokenType.DOUBLE, 205 "GLOBAL": TokenType.GLOBAL, 206 "INT256": TokenType.INT256, 207 "LOWCARDINALITY": TokenType.LOWCARDINALITY, 208 "MAP": TokenType.MAP, 209 "NESTED": TokenType.NESTED, 210 "SAMPLE": TokenType.TABLE_SAMPLE, 211 "TUPLE": TokenType.STRUCT, 212 "UINT128": TokenType.UINT128, 213 "UINT16": TokenType.USMALLINT, 214 "UINT256": TokenType.UINT256, 215 "UINT32": TokenType.UINT, 216 "UINT64": TokenType.UBIGINT, 217 "UINT8": TokenType.UTINYINT, 218 "IPV4": TokenType.IPV4, 219 "IPV6": TokenType.IPV6, 220 "POINT": TokenType.POINT, 221 "RING": TokenType.RING, 222 "LINESTRING": TokenType.LINESTRING, 223 "MULTILINESTRING": TokenType.MULTILINESTRING, 224 "POLYGON": TokenType.POLYGON, 225 "MULTIPOLYGON": TokenType.MULTIPOLYGON, 226 "AGGREGATEFUNCTION": TokenType.AGGREGATEFUNCTION, 227 "SIMPLEAGGREGATEFUNCTION": TokenType.SIMPLEAGGREGATEFUNCTION, 228 "SYSTEM": TokenType.COMMAND, 229 "PREWHERE": TokenType.PREWHERE, 230 } 231 KEYWORDS.pop("/*+") 232 233 SINGLE_TOKENS = { 234 **tokens.Tokenizer.SINGLE_TOKENS, 235 "$": TokenType.HEREDOC_STRING, 236 } 237 238 class Parser(parser.Parser): 239 # Tested in ClickHouse's playground, it seems that the following two queries do the same thing 240 # * select x from t1 union all select x from t2 limit 1; 241 # * select x from t1 union all (select x from t2 limit 1); 242 MODIFIERS_ATTACHED_TO_SET_OP = False 243 INTERVAL_SPANS = False 244 245 FUNCTIONS = { 246 **parser.Parser.FUNCTIONS, 247 "ANY": exp.AnyValue.from_arg_list, 248 "ARRAYSUM": exp.ArraySum.from_arg_list, 249 "COUNTIF": _build_count_if, 250 "DATE_ADD": build_date_delta(exp.DateAdd, default_unit=None), 251 "DATEADD": build_date_delta(exp.DateAdd, default_unit=None), 252 "DATE_DIFF": build_date_delta(exp.DateDiff, default_unit=None), 253 "DATEDIFF": build_date_delta(exp.DateDiff, default_unit=None), 254 "DATE_FORMAT": _build_date_format, 255 "DATE_SUB": build_date_delta(exp.DateSub, default_unit=None), 256 "DATESUB": build_date_delta(exp.DateSub, default_unit=None), 257 "FORMATDATETIME": _build_date_format, 258 "JSONEXTRACTSTRING": build_json_extract_path( 259 exp.JSONExtractScalar, zero_based_indexing=False 260 ), 261 "MAP": parser.build_var_map, 262 "MATCH": exp.RegexpLike.from_arg_list, 263 "RANDCANONICAL": exp.Rand.from_arg_list, 264 "STR_TO_DATE": _build_str_to_date, 265 "TUPLE": exp.Struct.from_arg_list, 266 "TIMESTAMP_SUB": build_date_delta(exp.TimestampSub, default_unit=None), 267 "TIMESTAMPSUB": build_date_delta(exp.TimestampSub, default_unit=None), 268 "TIMESTAMP_ADD": build_date_delta(exp.TimestampAdd, default_unit=None), 269 "TIMESTAMPADD": build_date_delta(exp.TimestampAdd, default_unit=None), 270 "UNIQ": exp.ApproxDistinct.from_arg_list, 271 "XOR": lambda args: exp.Xor(expressions=args), 272 "MD5": exp.MD5Digest.from_arg_list, 273 "SHA256": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(256)), 274 "SHA512": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(512)), 275 } 276 277 AGG_FUNCTIONS = { 278 "count", 279 "min", 280 "max", 281 "sum", 282 "avg", 283 "any", 284 "stddevPop", 285 "stddevSamp", 286 "varPop", 287 "varSamp", 288 "corr", 289 "covarPop", 290 "covarSamp", 291 "entropy", 292 "exponentialMovingAverage", 293 "intervalLengthSum", 294 "kolmogorovSmirnovTest", 295 "mannWhitneyUTest", 296 "median", 297 "rankCorr", 298 "sumKahan", 299 "studentTTest", 300 "welchTTest", 301 "anyHeavy", 302 "anyLast", 303 "boundingRatio", 304 "first_value", 305 "last_value", 306 "argMin", 307 "argMax", 308 "avgWeighted", 309 "topK", 310 "topKWeighted", 311 "deltaSum", 312 "deltaSumTimestamp", 313 "groupArray", 314 "groupArrayLast", 315 "groupUniqArray", 316 "groupArrayInsertAt", 317 "groupArrayMovingAvg", 318 "groupArrayMovingSum", 319 "groupArraySample", 320 "groupBitAnd", 321 "groupBitOr", 322 "groupBitXor", 323 "groupBitmap", 324 "groupBitmapAnd", 325 "groupBitmapOr", 326 "groupBitmapXor", 327 "sumWithOverflow", 328 "sumMap", 329 "minMap", 330 "maxMap", 331 "skewSamp", 332 "skewPop", 333 "kurtSamp", 334 "kurtPop", 335 "uniq", 336 "uniqExact", 337 "uniqCombined", 338 "uniqCombined64", 339 "uniqHLL12", 340 "uniqTheta", 341 "quantile", 342 "quantiles", 343 "quantileExact", 344 "quantilesExact", 345 "quantileExactLow", 346 "quantilesExactLow", 347 "quantileExactHigh", 348 "quantilesExactHigh", 349 "quantileExactWeighted", 350 "quantilesExactWeighted", 351 "quantileTiming", 352 "quantilesTiming", 353 "quantileTimingWeighted", 354 "quantilesTimingWeighted", 355 "quantileDeterministic", 356 "quantilesDeterministic", 357 "quantileTDigest", 358 "quantilesTDigest", 359 "quantileTDigestWeighted", 360 "quantilesTDigestWeighted", 361 "quantileBFloat16", 362 "quantilesBFloat16", 363 "quantileBFloat16Weighted", 364 "quantilesBFloat16Weighted", 365 "simpleLinearRegression", 366 "stochasticLinearRegression", 367 "stochasticLogisticRegression", 368 "categoricalInformationValue", 369 "contingency", 370 "cramersV", 371 "cramersVBiasCorrected", 372 "theilsU", 373 "maxIntersections", 374 "maxIntersectionsPosition", 375 "meanZTest", 376 "quantileInterpolatedWeighted", 377 "quantilesInterpolatedWeighted", 378 "quantileGK", 379 "quantilesGK", 380 "sparkBar", 381 "sumCount", 382 "largestTriangleThreeBuckets", 383 "histogram", 384 "sequenceMatch", 385 "sequenceCount", 386 "windowFunnel", 387 "retention", 388 "uniqUpTo", 389 "sequenceNextNode", 390 "exponentialTimeDecayedAvg", 391 } 392 393 AGG_FUNCTIONS_SUFFIXES = [ 394 "If", 395 "Array", 396 "ArrayIf", 397 "Map", 398 "SimpleState", 399 "State", 400 "Merge", 401 "MergeState", 402 "ForEach", 403 "Distinct", 404 "OrDefault", 405 "OrNull", 406 "Resample", 407 "ArgMin", 408 "ArgMax", 409 ] 410 411 FUNC_TOKENS = { 412 *parser.Parser.FUNC_TOKENS, 413 TokenType.SET, 414 } 415 416 RESERVED_TOKENS = parser.Parser.RESERVED_TOKENS - {TokenType.SELECT} 417 418 ID_VAR_TOKENS = { 419 *parser.Parser.ID_VAR_TOKENS, 420 TokenType.LIKE, 421 } 422 423 AGG_FUNC_MAPPING = ( 424 lambda functions, suffixes: { 425 f"{f}{sfx}": (f, sfx) for sfx in (suffixes + [""]) for f in functions 426 } 427 )(AGG_FUNCTIONS, AGG_FUNCTIONS_SUFFIXES) 428 429 FUNCTIONS_WITH_ALIASED_ARGS = {*parser.Parser.FUNCTIONS_WITH_ALIASED_ARGS, "TUPLE"} 430 431 FUNCTION_PARSERS = { 432 **parser.Parser.FUNCTION_PARSERS, 433 "ARRAYJOIN": lambda self: self.expression(exp.Explode, this=self._parse_expression()), 434 "QUANTILE": lambda self: self._parse_quantile(), 435 "COLUMNS": lambda self: self._parse_columns(), 436 } 437 438 FUNCTION_PARSERS.pop("MATCH") 439 440 NO_PAREN_FUNCTION_PARSERS = parser.Parser.NO_PAREN_FUNCTION_PARSERS.copy() 441 NO_PAREN_FUNCTION_PARSERS.pop("ANY") 442 443 NO_PAREN_FUNCTIONS = parser.Parser.NO_PAREN_FUNCTIONS.copy() 444 NO_PAREN_FUNCTIONS.pop(TokenType.CURRENT_TIMESTAMP) 445 446 RANGE_PARSERS = { 447 **parser.Parser.RANGE_PARSERS, 448 TokenType.GLOBAL: lambda self, this: self._match(TokenType.IN) 449 and self._parse_in(this, is_global=True), 450 } 451 452 # The PLACEHOLDER entry is popped because 1) it doesn't affect Clickhouse (it corresponds to 453 # the postgres-specific JSONBContains parser) and 2) it makes parsing the ternary op simpler. 454 COLUMN_OPERATORS = parser.Parser.COLUMN_OPERATORS.copy() 455 COLUMN_OPERATORS.pop(TokenType.PLACEHOLDER) 456 457 JOIN_KINDS = { 458 *parser.Parser.JOIN_KINDS, 459 TokenType.ANY, 460 TokenType.ASOF, 461 TokenType.ARRAY, 462 } 463 464 TABLE_ALIAS_TOKENS = parser.Parser.TABLE_ALIAS_TOKENS - { 465 TokenType.ANY, 466 TokenType.ARRAY, 467 TokenType.FINAL, 468 TokenType.FORMAT, 469 TokenType.SETTINGS, 470 } 471 472 ALIAS_TOKENS = parser.Parser.ALIAS_TOKENS - { 473 TokenType.FORMAT, 474 } 475 476 LOG_DEFAULTS_TO_LN = True 477 478 QUERY_MODIFIER_PARSERS = { 479 **parser.Parser.QUERY_MODIFIER_PARSERS, 480 TokenType.SETTINGS: lambda self: ( 481 "settings", 482 self._advance() or self._parse_csv(self._parse_assignment), 483 ), 484 TokenType.FORMAT: lambda self: ("format", self._advance() or self._parse_id_var()), 485 } 486 487 CONSTRAINT_PARSERS = { 488 **parser.Parser.CONSTRAINT_PARSERS, 489 "INDEX": lambda self: self._parse_index_constraint(), 490 "CODEC": lambda self: self._parse_compress(), 491 } 492 493 ALTER_PARSERS = { 494 **parser.Parser.ALTER_PARSERS, 495 "REPLACE": lambda self: self._parse_alter_table_replace(), 496 } 497 498 SCHEMA_UNNAMED_CONSTRAINTS = { 499 *parser.Parser.SCHEMA_UNNAMED_CONSTRAINTS, 500 "INDEX", 501 } 502 503 PLACEHOLDER_PARSERS = { 504 **parser.Parser.PLACEHOLDER_PARSERS, 505 TokenType.L_BRACE: lambda self: self._parse_query_parameter(), 506 } 507 508 # https://clickhouse.com/docs/en/sql-reference/statements/create/function 509 def _parse_user_defined_function_expression(self) -> t.Optional[exp.Expression]: 510 return self._parse_lambda() 511 512 def _parse_types( 513 self, check_func: bool = False, schema: bool = False, allow_identifiers: bool = True 514 ) -> t.Optional[exp.Expression]: 515 dtype = super()._parse_types( 516 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 517 ) 518 if isinstance(dtype, exp.DataType) and dtype.args.get("nullable") is not True: 519 # Mark every type as non-nullable which is ClickHouse's default, unless it's 520 # already marked as nullable. This marker helps us transpile types from other 521 # dialects to ClickHouse, so that we can e.g. produce `CAST(x AS Nullable(String))` 522 # from `CAST(x AS TEXT)`. If there is a `NULL` value in `x`, the former would 523 # fail in ClickHouse without the `Nullable` type constructor. 524 dtype.set("nullable", False) 525 526 return dtype 527 528 def _parse_extract(self) -> exp.Extract | exp.Anonymous: 529 index = self._index 530 this = self._parse_bitwise() 531 if self._match(TokenType.FROM): 532 self._retreat(index) 533 return super()._parse_extract() 534 535 # We return Anonymous here because extract and regexpExtract have different semantics, 536 # so parsing extract(foo, bar) into RegexpExtract can potentially break queries. E.g., 537 # `extract('foobar', 'b')` works, but ClickHouse crashes for `regexpExtract('foobar', 'b')`. 538 # 539 # TODO: can we somehow convert the former into an equivalent `regexpExtract` call? 540 self._match(TokenType.COMMA) 541 return self.expression( 542 exp.Anonymous, this="extract", expressions=[this, self._parse_bitwise()] 543 ) 544 545 def _parse_assignment(self) -> t.Optional[exp.Expression]: 546 this = super()._parse_assignment() 547 548 if self._match(TokenType.PLACEHOLDER): 549 return self.expression( 550 exp.If, 551 this=this, 552 true=self._parse_assignment(), 553 false=self._match(TokenType.COLON) and self._parse_assignment(), 554 ) 555 556 return this 557 558 def _parse_query_parameter(self) -> t.Optional[exp.Expression]: 559 """ 560 Parse a placeholder expression like SELECT {abc: UInt32} or FROM {table: Identifier} 561 https://clickhouse.com/docs/en/sql-reference/syntax#defining-and-using-query-parameters 562 """ 563 this = self._parse_id_var() 564 self._match(TokenType.COLON) 565 kind = self._parse_types(check_func=False, allow_identifiers=False) or ( 566 self._match_text_seq("IDENTIFIER") and "Identifier" 567 ) 568 569 if not kind: 570 self.raise_error("Expecting a placeholder type or 'Identifier' for tables") 571 elif not self._match(TokenType.R_BRACE): 572 self.raise_error("Expecting }") 573 574 return self.expression(exp.Placeholder, this=this, kind=kind) 575 576 def _parse_in(self, this: t.Optional[exp.Expression], is_global: bool = False) -> exp.In: 577 this = super()._parse_in(this) 578 this.set("is_global", is_global) 579 return this 580 581 def _parse_table( 582 self, 583 schema: bool = False, 584 joins: bool = False, 585 alias_tokens: t.Optional[t.Collection[TokenType]] = None, 586 parse_bracket: bool = False, 587 is_db_reference: bool = False, 588 parse_partition: bool = False, 589 ) -> t.Optional[exp.Expression]: 590 this = super()._parse_table( 591 schema=schema, 592 joins=joins, 593 alias_tokens=alias_tokens, 594 parse_bracket=parse_bracket, 595 is_db_reference=is_db_reference, 596 ) 597 598 if self._match(TokenType.FINAL): 599 this = self.expression(exp.Final, this=this) 600 601 return this 602 603 def _parse_position(self, haystack_first: bool = False) -> exp.StrPosition: 604 return super()._parse_position(haystack_first=True) 605 606 # https://clickhouse.com/docs/en/sql-reference/statements/select/with/ 607 def _parse_cte(self) -> exp.CTE: 608 # WITH <identifier> AS <subquery expression> 609 cte: t.Optional[exp.CTE] = self._try_parse(super()._parse_cte) 610 611 if not cte: 612 # WITH <expression> AS <identifier> 613 cte = self.expression( 614 exp.CTE, 615 this=self._parse_assignment(), 616 alias=self._parse_table_alias(), 617 scalar=True, 618 ) 619 620 return cte 621 622 def _parse_join_parts( 623 self, 624 ) -> t.Tuple[t.Optional[Token], t.Optional[Token], t.Optional[Token]]: 625 is_global = self._match(TokenType.GLOBAL) and self._prev 626 kind_pre = self._match_set(self.JOIN_KINDS, advance=False) and self._prev 627 628 if kind_pre: 629 kind = self._match_set(self.JOIN_KINDS) and self._prev 630 side = self._match_set(self.JOIN_SIDES) and self._prev 631 return is_global, side, kind 632 633 return ( 634 is_global, 635 self._match_set(self.JOIN_SIDES) and self._prev, 636 self._match_set(self.JOIN_KINDS) and self._prev, 637 ) 638 639 def _parse_join( 640 self, skip_join_token: bool = False, parse_bracket: bool = False 641 ) -> t.Optional[exp.Join]: 642 join = super()._parse_join(skip_join_token=skip_join_token, parse_bracket=True) 643 if join: 644 join.set("global", join.args.pop("method", None)) 645 646 # tbl ARRAY JOIN arr <-- this should be a `Column` reference, not a `Table` 647 # https://clickhouse.com/docs/en/sql-reference/statements/select/array-join 648 if join.kind == "ARRAY": 649 for table in join.find_all(exp.Table): 650 table.replace(table.to_column()) 651 652 return join 653 654 def _parse_function( 655 self, 656 functions: t.Optional[t.Dict[str, t.Callable]] = None, 657 anonymous: bool = False, 658 optional_parens: bool = True, 659 any_token: bool = False, 660 ) -> t.Optional[exp.Expression]: 661 expr = super()._parse_function( 662 functions=functions, 663 anonymous=anonymous, 664 optional_parens=optional_parens, 665 any_token=any_token, 666 ) 667 668 func = expr.this if isinstance(expr, exp.Window) else expr 669 670 # Aggregate functions can be split in 2 parts: <func_name><suffix> 671 parts = ( 672 self.AGG_FUNC_MAPPING.get(func.this) if isinstance(func, exp.Anonymous) else None 673 ) 674 675 if parts: 676 anon_func: exp.Anonymous = t.cast(exp.Anonymous, func) 677 params = self._parse_func_params(anon_func) 678 679 kwargs = { 680 "this": anon_func.this, 681 "expressions": anon_func.expressions, 682 } 683 if parts[1]: 684 kwargs["parts"] = parts 685 exp_class: t.Type[exp.Expression] = ( 686 exp.CombinedParameterizedAgg if params else exp.CombinedAggFunc 687 ) 688 else: 689 exp_class = exp.ParameterizedAgg if params else exp.AnonymousAggFunc 690 691 kwargs["exp_class"] = exp_class 692 if params: 693 kwargs["params"] = params 694 695 func = self.expression(**kwargs) 696 697 if isinstance(expr, exp.Window): 698 # The window's func was parsed as Anonymous in base parser, fix its 699 # type to be ClickHouse style CombinedAnonymousAggFunc / AnonymousAggFunc 700 expr.set("this", func) 701 elif params: 702 # Params have blocked super()._parse_function() from parsing the following window 703 # (if that exists) as they're standing between the function call and the window spec 704 expr = self._parse_window(func) 705 else: 706 expr = func 707 708 return expr 709 710 def _parse_func_params( 711 self, this: t.Optional[exp.Func] = None 712 ) -> t.Optional[t.List[exp.Expression]]: 713 if self._match_pair(TokenType.R_PAREN, TokenType.L_PAREN): 714 return self._parse_csv(self._parse_lambda) 715 716 if self._match(TokenType.L_PAREN): 717 params = self._parse_csv(self._parse_lambda) 718 self._match_r_paren(this) 719 return params 720 721 return None 722 723 def _parse_quantile(self) -> exp.Quantile: 724 this = self._parse_lambda() 725 params = self._parse_func_params() 726 if params: 727 return self.expression(exp.Quantile, this=params[0], quantile=this) 728 return self.expression(exp.Quantile, this=this, quantile=exp.Literal.number(0.5)) 729 730 def _parse_wrapped_id_vars(self, optional: bool = False) -> t.List[exp.Expression]: 731 return super()._parse_wrapped_id_vars(optional=True) 732 733 def _parse_primary_key( 734 self, wrapped_optional: bool = False, in_props: bool = False 735 ) -> exp.PrimaryKeyColumnConstraint | exp.PrimaryKey: 736 return super()._parse_primary_key( 737 wrapped_optional=wrapped_optional or in_props, in_props=in_props 738 ) 739 740 def _parse_on_property(self) -> t.Optional[exp.Expression]: 741 index = self._index 742 if self._match_text_seq("CLUSTER"): 743 this = self._parse_id_var() 744 if this: 745 return self.expression(exp.OnCluster, this=this) 746 else: 747 self._retreat(index) 748 return None 749 750 def _parse_index_constraint( 751 self, kind: t.Optional[str] = None 752 ) -> exp.IndexColumnConstraint: 753 # INDEX name1 expr TYPE type1(args) GRANULARITY value 754 this = self._parse_id_var() 755 expression = self._parse_assignment() 756 757 index_type = self._match_text_seq("TYPE") and ( 758 self._parse_function() or self._parse_var() 759 ) 760 761 granularity = self._match_text_seq("GRANULARITY") and self._parse_term() 762 763 return self.expression( 764 exp.IndexColumnConstraint, 765 this=this, 766 expression=expression, 767 index_type=index_type, 768 granularity=granularity, 769 ) 770 771 def _parse_partition(self) -> t.Optional[exp.Partition]: 772 # https://clickhouse.com/docs/en/sql-reference/statements/alter/partition#how-to-set-partition-expression 773 if not self._match(TokenType.PARTITION): 774 return None 775 776 if self._match_text_seq("ID"): 777 # Corresponds to the PARTITION ID <string_value> syntax 778 expressions: t.List[exp.Expression] = [ 779 self.expression(exp.PartitionId, this=self._parse_string()) 780 ] 781 else: 782 expressions = self._parse_expressions() 783 784 return self.expression(exp.Partition, expressions=expressions) 785 786 def _parse_alter_table_replace(self) -> t.Optional[exp.Expression]: 787 partition = self._parse_partition() 788 789 if not partition or not self._match(TokenType.FROM): 790 return None 791 792 return self.expression( 793 exp.ReplacePartition, expression=partition, source=self._parse_table_parts() 794 ) 795 796 def _parse_projection_def(self) -> t.Optional[exp.ProjectionDef]: 797 if not self._match_text_seq("PROJECTION"): 798 return None 799 800 return self.expression( 801 exp.ProjectionDef, 802 this=self._parse_id_var(), 803 expression=self._parse_wrapped(self._parse_statement), 804 ) 805 806 def _parse_constraint(self) -> t.Optional[exp.Expression]: 807 return super()._parse_constraint() or self._parse_projection_def() 808 809 def _parse_alias( 810 self, this: t.Optional[exp.Expression], explicit: bool = False 811 ) -> t.Optional[exp.Expression]: 812 # In clickhouse "SELECT <expr> APPLY(...)" is a query modifier, 813 # so "APPLY" shouldn't be parsed as <expr>'s alias. However, "SELECT <expr> apply" is a valid alias 814 if self._match_pair(TokenType.APPLY, TokenType.L_PAREN, advance=False): 815 return this 816 817 return super()._parse_alias(this=this, explicit=explicit) 818 819 def _parse_expression(self) -> t.Optional[exp.Expression]: 820 this = super()._parse_expression() 821 822 # Clickhouse allows "SELECT <expr> [APPLY(func)] [...]]" modifier 823 while self._match_pair(TokenType.APPLY, TokenType.L_PAREN): 824 this = exp.Apply(this=this, expression=self._parse_var(any_token=True)) 825 self._match(TokenType.R_PAREN) 826 827 return this 828 829 def _parse_columns(self) -> exp.Expression: 830 this: exp.Expression = self.expression(exp.Columns, this=self._parse_lambda()) 831 832 while self._next and self._match_text_seq(")", "APPLY", "("): 833 self._match(TokenType.R_PAREN) 834 this = exp.Apply(this=this, expression=self._parse_var(any_token=True)) 835 return this 836 837 class Generator(generator.Generator): 838 QUERY_HINTS = False 839 STRUCT_DELIMITER = ("(", ")") 840 NVL2_SUPPORTED = False 841 TABLESAMPLE_REQUIRES_PARENS = False 842 TABLESAMPLE_SIZE_IS_ROWS = False 843 TABLESAMPLE_KEYWORDS = "SAMPLE" 844 LAST_DAY_SUPPORTS_DATE_PART = False 845 CAN_IMPLEMENT_ARRAY_ANY = True 846 SUPPORTS_TO_NUMBER = False 847 JOIN_HINTS = False 848 TABLE_HINTS = False 849 GROUPINGS_SEP = "" 850 SET_OP_MODIFIERS = False 851 SUPPORTS_TABLE_ALIAS_COLUMNS = False 852 VALUES_AS_TABLE = False 853 854 STRING_TYPE_MAPPING = { 855 exp.DataType.Type.CHAR: "String", 856 exp.DataType.Type.LONGBLOB: "String", 857 exp.DataType.Type.LONGTEXT: "String", 858 exp.DataType.Type.MEDIUMBLOB: "String", 859 exp.DataType.Type.MEDIUMTEXT: "String", 860 exp.DataType.Type.TINYBLOB: "String", 861 exp.DataType.Type.TINYTEXT: "String", 862 exp.DataType.Type.TEXT: "String", 863 exp.DataType.Type.VARBINARY: "String", 864 exp.DataType.Type.VARCHAR: "String", 865 } 866 867 SUPPORTED_JSON_PATH_PARTS = { 868 exp.JSONPathKey, 869 exp.JSONPathRoot, 870 exp.JSONPathSubscript, 871 } 872 873 TYPE_MAPPING = { 874 **generator.Generator.TYPE_MAPPING, 875 **STRING_TYPE_MAPPING, 876 exp.DataType.Type.ARRAY: "Array", 877 exp.DataType.Type.BOOLEAN: "Bool", 878 exp.DataType.Type.BIGINT: "Int64", 879 exp.DataType.Type.DATE32: "Date32", 880 exp.DataType.Type.DATETIME: "DateTime", 881 exp.DataType.Type.DATETIME64: "DateTime64", 882 exp.DataType.Type.DECIMAL: "Decimal", 883 exp.DataType.Type.DECIMAL32: "Decimal32", 884 exp.DataType.Type.DECIMAL64: "Decimal64", 885 exp.DataType.Type.DECIMAL128: "Decimal128", 886 exp.DataType.Type.DECIMAL256: "Decimal256", 887 exp.DataType.Type.TIMESTAMP: "DateTime", 888 exp.DataType.Type.TIMESTAMPTZ: "DateTime", 889 exp.DataType.Type.DOUBLE: "Float64", 890 exp.DataType.Type.ENUM: "Enum", 891 exp.DataType.Type.ENUM8: "Enum8", 892 exp.DataType.Type.ENUM16: "Enum16", 893 exp.DataType.Type.FIXEDSTRING: "FixedString", 894 exp.DataType.Type.FLOAT: "Float32", 895 exp.DataType.Type.INT: "Int32", 896 exp.DataType.Type.MEDIUMINT: "Int32", 897 exp.DataType.Type.INT128: "Int128", 898 exp.DataType.Type.INT256: "Int256", 899 exp.DataType.Type.LOWCARDINALITY: "LowCardinality", 900 exp.DataType.Type.MAP: "Map", 901 exp.DataType.Type.NESTED: "Nested", 902 exp.DataType.Type.SMALLINT: "Int16", 903 exp.DataType.Type.STRUCT: "Tuple", 904 exp.DataType.Type.TINYINT: "Int8", 905 exp.DataType.Type.UBIGINT: "UInt64", 906 exp.DataType.Type.UINT: "UInt32", 907 exp.DataType.Type.UINT128: "UInt128", 908 exp.DataType.Type.UINT256: "UInt256", 909 exp.DataType.Type.USMALLINT: "UInt16", 910 exp.DataType.Type.UTINYINT: "UInt8", 911 exp.DataType.Type.IPV4: "IPv4", 912 exp.DataType.Type.IPV6: "IPv6", 913 exp.DataType.Type.POINT: "Point", 914 exp.DataType.Type.RING: "Ring", 915 exp.DataType.Type.LINESTRING: "LineString", 916 exp.DataType.Type.MULTILINESTRING: "MultiLineString", 917 exp.DataType.Type.POLYGON: "Polygon", 918 exp.DataType.Type.MULTIPOLYGON: "MultiPolygon", 919 exp.DataType.Type.AGGREGATEFUNCTION: "AggregateFunction", 920 exp.DataType.Type.SIMPLEAGGREGATEFUNCTION: "SimpleAggregateFunction", 921 } 922 923 TRANSFORMS = { 924 **generator.Generator.TRANSFORMS, 925 exp.AnyValue: rename_func("any"), 926 exp.ApproxDistinct: rename_func("uniq"), 927 exp.ArrayFilter: lambda self, e: self.func("arrayFilter", e.expression, e.this), 928 exp.ArraySize: rename_func("LENGTH"), 929 exp.ArraySum: rename_func("arraySum"), 930 exp.ArgMax: arg_max_or_min_no_count("argMax"), 931 exp.ArgMin: arg_max_or_min_no_count("argMin"), 932 exp.Array: inline_array_sql, 933 exp.CastToStrType: rename_func("CAST"), 934 exp.CountIf: rename_func("countIf"), 935 exp.CompressColumnConstraint: lambda self, 936 e: f"CODEC({self.expressions(e, key='this', flat=True)})", 937 exp.ComputedColumnConstraint: lambda self, 938 e: f"{'MATERIALIZED' if e.args.get('persisted') else 'ALIAS'} {self.sql(e, 'this')}", 939 exp.CurrentDate: lambda self, e: self.func("CURRENT_DATE"), 940 exp.DateAdd: _datetime_delta_sql("DATE_ADD"), 941 exp.DateDiff: _datetime_delta_sql("DATE_DIFF"), 942 exp.DateStrToDate: rename_func("toDate"), 943 exp.DateSub: _datetime_delta_sql("DATE_SUB"), 944 exp.Explode: rename_func("arrayJoin"), 945 exp.Final: lambda self, e: f"{self.sql(e, 'this')} FINAL", 946 exp.IsNan: rename_func("isNaN"), 947 exp.JSONExtract: json_extract_segments("JSONExtractString", quoted_index=False), 948 exp.JSONExtractScalar: json_extract_segments("JSONExtractString", quoted_index=False), 949 exp.JSONPathKey: json_path_key_only_name, 950 exp.JSONPathRoot: lambda *_: "", 951 exp.Map: lambda self, e: _lower_func(var_map_sql(self, e)), 952 exp.Nullif: rename_func("nullIf"), 953 exp.PartitionedByProperty: lambda self, e: f"PARTITION BY {self.sql(e, 'this')}", 954 exp.Pivot: no_pivot_sql, 955 exp.Quantile: _quantile_sql, 956 exp.RegexpLike: lambda self, e: self.func("match", e.this, e.expression), 957 exp.Rand: rename_func("randCanonical"), 958 exp.StartsWith: rename_func("startsWith"), 959 exp.StrPosition: lambda self, e: self.func( 960 "position", e.this, e.args.get("substr"), e.args.get("position") 961 ), 962 exp.TimeToStr: lambda self, e: self.func( 963 "formatDateTime", e.this, self.format_time(e), e.args.get("zone") 964 ), 965 exp.TimeStrToTime: _timestrtotime_sql, 966 exp.TimestampAdd: _datetime_delta_sql("TIMESTAMP_ADD"), 967 exp.TimestampSub: _datetime_delta_sql("TIMESTAMP_SUB"), 968 exp.VarMap: lambda self, e: _lower_func(var_map_sql(self, e)), 969 exp.Xor: lambda self, e: self.func("xor", e.this, e.expression, *e.expressions), 970 exp.MD5Digest: rename_func("MD5"), 971 exp.MD5: lambda self, e: self.func("LOWER", self.func("HEX", self.func("MD5", e.this))), 972 exp.SHA: rename_func("SHA1"), 973 exp.SHA2: sha256_sql, 974 exp.UnixToTime: _unix_to_time_sql, 975 exp.TimestampTrunc: timestamptrunc_sql(zone=True), 976 exp.Trim: trim_sql, 977 exp.Variance: rename_func("varSamp"), 978 exp.SchemaCommentProperty: lambda self, e: self.naked_property(e), 979 exp.Stddev: rename_func("stddevSamp"), 980 exp.Chr: rename_func("CHAR"), 981 exp.Lag: lambda self, e: self.func( 982 "lagInFrame", e.this, e.args.get("offset"), e.args.get("default") 983 ), 984 exp.Lead: lambda self, e: self.func( 985 "leadInFrame", e.this, e.args.get("offset"), e.args.get("default") 986 ), 987 } 988 989 PROPERTIES_LOCATION = { 990 **generator.Generator.PROPERTIES_LOCATION, 991 exp.OnCluster: exp.Properties.Location.POST_NAME, 992 exp.PartitionedByProperty: exp.Properties.Location.POST_SCHEMA, 993 exp.ToTableProperty: exp.Properties.Location.POST_NAME, 994 exp.VolatileProperty: exp.Properties.Location.UNSUPPORTED, 995 } 996 997 # There's no list in docs, but it can be found in Clickhouse code 998 # see `ClickHouse/src/Parsers/ParserCreate*.cpp` 999 ON_CLUSTER_TARGETS = { 1000 "SCHEMA", # Transpiled CREATE SCHEMA may have OnCluster property set 1001 "DATABASE", 1002 "TABLE", 1003 "VIEW", 1004 "DICTIONARY", 1005 "INDEX", 1006 "FUNCTION", 1007 "NAMED COLLECTION", 1008 } 1009 1010 # https://clickhouse.com/docs/en/sql-reference/data-types/nullable 1011 NON_NULLABLE_TYPES = { 1012 exp.DataType.Type.ARRAY, 1013 exp.DataType.Type.MAP, 1014 exp.DataType.Type.STRUCT, 1015 exp.DataType.Type.POINT, 1016 exp.DataType.Type.RING, 1017 exp.DataType.Type.LINESTRING, 1018 exp.DataType.Type.MULTILINESTRING, 1019 exp.DataType.Type.POLYGON, 1020 exp.DataType.Type.MULTIPOLYGON, 1021 } 1022 1023 def strtodate_sql(self, expression: exp.StrToDate) -> str: 1024 strtodate_sql = self.function_fallback_sql(expression) 1025 1026 if not isinstance(expression.parent, exp.Cast): 1027 # StrToDate returns DATEs in other dialects (eg. postgres), so 1028 # this branch aims to improve the transpilation to clickhouse 1029 return f"CAST({strtodate_sql} AS DATE)" 1030 1031 return strtodate_sql 1032 1033 def cast_sql(self, expression: exp.Cast, safe_prefix: t.Optional[str] = None) -> str: 1034 this = expression.this 1035 1036 if isinstance(this, exp.StrToDate) and expression.to == exp.DataType.build("datetime"): 1037 return self.sql(this) 1038 1039 return super().cast_sql(expression, safe_prefix=safe_prefix) 1040 1041 def trycast_sql(self, expression: exp.TryCast) -> str: 1042 dtype = expression.to 1043 if not dtype.is_type(*self.NON_NULLABLE_TYPES, check_nullable=True): 1044 # Casting x into Nullable(T) appears to behave similarly to TRY_CAST(x AS T) 1045 dtype.set("nullable", True) 1046 1047 return super().cast_sql(expression) 1048 1049 def _jsonpathsubscript_sql(self, expression: exp.JSONPathSubscript) -> str: 1050 this = self.json_path_part(expression.this) 1051 return str(int(this) + 1) if is_int(this) else this 1052 1053 def likeproperty_sql(self, expression: exp.LikeProperty) -> str: 1054 return f"AS {self.sql(expression, 'this')}" 1055 1056 def _any_to_has( 1057 self, 1058 expression: exp.EQ | exp.NEQ, 1059 default: t.Callable[[t.Any], str], 1060 prefix: str = "", 1061 ) -> str: 1062 if isinstance(expression.left, exp.Any): 1063 arr = expression.left 1064 this = expression.right 1065 elif isinstance(expression.right, exp.Any): 1066 arr = expression.right 1067 this = expression.left 1068 else: 1069 return default(expression) 1070 1071 return prefix + self.func("has", arr.this.unnest(), this) 1072 1073 def eq_sql(self, expression: exp.EQ) -> str: 1074 return self._any_to_has(expression, super().eq_sql) 1075 1076 def neq_sql(self, expression: exp.NEQ) -> str: 1077 return self._any_to_has(expression, super().neq_sql, "NOT ") 1078 1079 def regexpilike_sql(self, expression: exp.RegexpILike) -> str: 1080 # Manually add a flag to make the search case-insensitive 1081 regex = self.func("CONCAT", "'(?i)'", expression.expression) 1082 return self.func("match", expression.this, regex) 1083 1084 def datatype_sql(self, expression: exp.DataType) -> str: 1085 # String is the standard ClickHouse type, every other variant is just an alias. 1086 # Additionally, any supplied length parameter will be ignored. 1087 # 1088 # https://clickhouse.com/docs/en/sql-reference/data-types/string 1089 if expression.this in self.STRING_TYPE_MAPPING: 1090 dtype = "String" 1091 else: 1092 dtype = super().datatype_sql(expression) 1093 1094 # This section changes the type to `Nullable(...)` if the following conditions hold: 1095 # - It's marked as nullable - this ensures we won't wrap ClickHouse types with `Nullable` 1096 # and change their semantics 1097 # - It's not the key type of a `Map`. This is because ClickHouse enforces the following 1098 # constraint: "Type of Map key must be a type, that can be represented by integer or 1099 # String or FixedString (possibly LowCardinality) or UUID or IPv6" 1100 # - It's not a composite type, e.g. `Nullable(Array(...))` is not a valid type 1101 parent = expression.parent 1102 nullable = expression.args.get("nullable") 1103 if nullable is True or ( 1104 nullable is None 1105 and not ( 1106 isinstance(parent, exp.DataType) 1107 and parent.is_type(exp.DataType.Type.MAP, check_nullable=True) 1108 and expression.index in (None, 0) 1109 ) 1110 and not expression.is_type(*self.NON_NULLABLE_TYPES, check_nullable=True) 1111 ): 1112 dtype = f"Nullable({dtype})" 1113 1114 return dtype 1115 1116 def cte_sql(self, expression: exp.CTE) -> str: 1117 if expression.args.get("scalar"): 1118 this = self.sql(expression, "this") 1119 alias = self.sql(expression, "alias") 1120 return f"{this} AS {alias}" 1121 1122 return super().cte_sql(expression) 1123 1124 def after_limit_modifiers(self, expression: exp.Expression) -> t.List[str]: 1125 return super().after_limit_modifiers(expression) + [ 1126 ( 1127 self.seg("SETTINGS ") + self.expressions(expression, key="settings", flat=True) 1128 if expression.args.get("settings") 1129 else "" 1130 ), 1131 ( 1132 self.seg("FORMAT ") + self.sql(expression, "format") 1133 if expression.args.get("format") 1134 else "" 1135 ), 1136 ] 1137 1138 def parameterizedagg_sql(self, expression: exp.ParameterizedAgg) -> str: 1139 params = self.expressions(expression, key="params", flat=True) 1140 return self.func(expression.name, *expression.expressions) + f"({params})" 1141 1142 def anonymousaggfunc_sql(self, expression: exp.AnonymousAggFunc) -> str: 1143 return self.func(expression.name, *expression.expressions) 1144 1145 def combinedaggfunc_sql(self, expression: exp.CombinedAggFunc) -> str: 1146 return self.anonymousaggfunc_sql(expression) 1147 1148 def combinedparameterizedagg_sql(self, expression: exp.CombinedParameterizedAgg) -> str: 1149 return self.parameterizedagg_sql(expression) 1150 1151 def placeholder_sql(self, expression: exp.Placeholder) -> str: 1152 return f"{{{expression.name}: {self.sql(expression, 'kind')}}}" 1153 1154 def oncluster_sql(self, expression: exp.OnCluster) -> str: 1155 return f"ON CLUSTER {self.sql(expression, 'this')}" 1156 1157 def createable_sql(self, expression: exp.Create, locations: t.DefaultDict) -> str: 1158 if expression.kind in self.ON_CLUSTER_TARGETS and locations.get( 1159 exp.Properties.Location.POST_NAME 1160 ): 1161 this_name = self.sql( 1162 expression.this if isinstance(expression.this, exp.Schema) else expression, 1163 "this", 1164 ) 1165 this_properties = " ".join( 1166 [self.sql(prop) for prop in locations[exp.Properties.Location.POST_NAME]] 1167 ) 1168 this_schema = self.schema_columns_sql(expression.this) 1169 this_schema = f"{self.sep()}{this_schema}" if this_schema else "" 1170 1171 return f"{this_name}{self.sep()}{this_properties}{this_schema}" 1172 1173 return super().createable_sql(expression, locations) 1174 1175 def create_sql(self, expression: exp.Create) -> str: 1176 # The comment property comes last in CTAS statements, i.e. after the query 1177 query = expression.expression 1178 if isinstance(query, exp.Query): 1179 comment_prop = expression.find(exp.SchemaCommentProperty) 1180 if comment_prop: 1181 comment_prop.pop() 1182 query.replace(exp.paren(query)) 1183 else: 1184 comment_prop = None 1185 1186 create_sql = super().create_sql(expression) 1187 1188 comment_sql = self.sql(comment_prop) 1189 comment_sql = f" {comment_sql}" if comment_sql else "" 1190 1191 return f"{create_sql}{comment_sql}" 1192 1193 def prewhere_sql(self, expression: exp.PreWhere) -> str: 1194 this = self.indent(self.sql(expression, "this")) 1195 return f"{self.seg('PREWHERE')}{self.sep()}{this}" 1196 1197 def indexcolumnconstraint_sql(self, expression: exp.IndexColumnConstraint) -> str: 1198 this = self.sql(expression, "this") 1199 this = f" {this}" if this else "" 1200 expr = self.sql(expression, "expression") 1201 expr = f" {expr}" if expr else "" 1202 index_type = self.sql(expression, "index_type") 1203 index_type = f" TYPE {index_type}" if index_type else "" 1204 granularity = self.sql(expression, "granularity") 1205 granularity = f" GRANULARITY {granularity}" if granularity else "" 1206 1207 return f"INDEX{this}{expr}{index_type}{granularity}" 1208 1209 def partition_sql(self, expression: exp.Partition) -> str: 1210 return f"PARTITION {self.expressions(expression, flat=True)}" 1211 1212 def partitionid_sql(self, expression: exp.PartitionId) -> str: 1213 return f"ID {self.sql(expression.this)}" 1214 1215 def replacepartition_sql(self, expression: exp.ReplacePartition) -> str: 1216 return ( 1217 f"REPLACE {self.sql(expression.expression)} FROM {self.sql(expression, 'source')}" 1218 ) 1219 1220 def projectiondef_sql(self, expression: exp.ProjectionDef) -> str: 1221 return f"PROJECTION {self.sql(expression.this)} {self.wrap(expression.expression)}" 1222 1223 def is_sql(self, expression: exp.Is) -> str: 1224 is_sql = super().is_sql(expression) 1225 1226 if isinstance(expression.parent, exp.Not) and isinstance(expression.this, exp.Subquery): 1227 # WHERE (SELECT ...) IS NOT NULL -> NOT ((SELECT ...) IS NULL) 1228 is_sql = self.wrap(is_sql) 1229 1230 return is_sql
Determines how function names are going to be normalized.
Possible values:
"upper" or True: Convert names to uppercase. "lower": Convert names to lowercase. False: Disables function name normalization.
Default NULL
ordering method to use if not explicitly set.
Possible values: "nulls_are_small"
, "nulls_are_large"
, "nulls_are_last"
Whether the base comes first in the LOG
function.
Possible values: True
, False
, None
(two arguments are not supported by LOG
)
Whether alias reference expansion (_expand_alias_refs()) should run before column qualification (_qualify_columns()).
For example:
WITH data AS ( SELECT 1 AS id, 2 AS my_id ) SELECT id AS my_id FROM data WHERE my_id = 1 GROUP BY my_id, HAVING my_id = 1
In most dialects, "my_id" would refer to "data.my_id" across the query, except: - BigQuery, which will forward the alias to GROUP BY + HAVING clauses i.e it resolves to "WHERE my_id = 1 GROUP BY id HAVING id = 1" - Clickhouse, which will forward the alias across the query i.e it resolves to "WHERE id = 1 GROUP BY id HAVING id = 1"
Specifies the strategy according to which identifiers should be normalized.
Mapping of an escaped sequence (\n
) to its unescaped version (
).
Helper for dialects that use a different name for the same creatable kind. For example, the Clickhouse equivalent of CREATE SCHEMA is CREATE DATABASE.
Whether a set operation uses DISTINCT by default. This is None
when either DISTINCT
or ALL
must be explicitly specified.
Inherited Members
- sqlglot.dialects.dialect.Dialect
- Dialect
- INDEX_OFFSET
- WEEK_OFFSET
- UNNEST_COLUMN_ONLY
- ALIAS_POST_TABLESAMPLE
- TABLESAMPLE_SIZE_IS_PERCENT
- IDENTIFIERS_CAN_START_WITH_DIGIT
- DPIPE_IS_STRING_CONCAT
- STRICT_STRING_CONCAT
- SUPPORTS_SEMI_ANTI_JOIN
- COPY_PARAMS_ARE_CSV
- TYPED_DIVISION
- CONCAT_COALESCE
- HEX_LOWERCASE
- DATE_FORMAT
- DATEINT_FORMAT
- TIME_FORMAT
- TIME_MAPPING
- FORMAT_MAPPING
- PSEUDOCOLUMNS
- PREFER_CTE_ALIAS_COLUMN
- EXPAND_ALIAS_REFS_EARLY_ONLY_IN_GROUP_BY
- SUPPORTS_ORDER_BY_ALL
- HAS_DISTINCT_ARRAY_CONSTRUCTORS
- SUPPORTS_FIXED_SIZE_ARRAYS
- STRICT_JSON_PATH_SYNTAX
- ON_CONDITION_EMPTY_BEFORE_ERROR
- ARRAY_AGG_INCLUDES_NULLS
- REGEXP_EXTRACT_DEFAULT_GROUP
- DATE_PART_MAPPING
- TYPE_TO_EXPRESSIONS
- ANNOTATORS
- get_or_raise
- format_time
- settings
- normalize_identifier
- case_sensitive
- can_identify
- quote_identifier
- to_json_path
- parse
- parse_into
- generate
- transpile
- tokenize
- tokenizer
- jsonpath_tokenizer
- parser
- generator
184 class Tokenizer(tokens.Tokenizer): 185 COMMENTS = ["--", "#", "#!", ("/*", "*/")] 186 IDENTIFIERS = ['"', "`"] 187 IDENTIFIER_ESCAPES = ["\\"] 188 STRING_ESCAPES = ["'", "\\"] 189 BIT_STRINGS = [("0b", "")] 190 HEX_STRINGS = [("0x", ""), ("0X", "")] 191 HEREDOC_STRINGS = ["$"] 192 193 KEYWORDS = { 194 **tokens.Tokenizer.KEYWORDS, 195 "ATTACH": TokenType.COMMAND, 196 "DATE32": TokenType.DATE32, 197 "DATETIME64": TokenType.DATETIME64, 198 "DICTIONARY": TokenType.DICTIONARY, 199 "ENUM8": TokenType.ENUM8, 200 "ENUM16": TokenType.ENUM16, 201 "FINAL": TokenType.FINAL, 202 "FIXEDSTRING": TokenType.FIXEDSTRING, 203 "FLOAT32": TokenType.FLOAT, 204 "FLOAT64": TokenType.DOUBLE, 205 "GLOBAL": TokenType.GLOBAL, 206 "INT256": TokenType.INT256, 207 "LOWCARDINALITY": TokenType.LOWCARDINALITY, 208 "MAP": TokenType.MAP, 209 "NESTED": TokenType.NESTED, 210 "SAMPLE": TokenType.TABLE_SAMPLE, 211 "TUPLE": TokenType.STRUCT, 212 "UINT128": TokenType.UINT128, 213 "UINT16": TokenType.USMALLINT, 214 "UINT256": TokenType.UINT256, 215 "UINT32": TokenType.UINT, 216 "UINT64": TokenType.UBIGINT, 217 "UINT8": TokenType.UTINYINT, 218 "IPV4": TokenType.IPV4, 219 "IPV6": TokenType.IPV6, 220 "POINT": TokenType.POINT, 221 "RING": TokenType.RING, 222 "LINESTRING": TokenType.LINESTRING, 223 "MULTILINESTRING": TokenType.MULTILINESTRING, 224 "POLYGON": TokenType.POLYGON, 225 "MULTIPOLYGON": TokenType.MULTIPOLYGON, 226 "AGGREGATEFUNCTION": TokenType.AGGREGATEFUNCTION, 227 "SIMPLEAGGREGATEFUNCTION": TokenType.SIMPLEAGGREGATEFUNCTION, 228 "SYSTEM": TokenType.COMMAND, 229 "PREWHERE": TokenType.PREWHERE, 230 } 231 KEYWORDS.pop("/*+") 232 233 SINGLE_TOKENS = { 234 **tokens.Tokenizer.SINGLE_TOKENS, 235 "$": TokenType.HEREDOC_STRING, 236 }
Inherited Members
- sqlglot.tokens.Tokenizer
- Tokenizer
- BYTE_STRINGS
- RAW_STRINGS
- UNICODE_STRINGS
- QUOTES
- VAR_SINGLE_TOKENS
- HEREDOC_TAG_IS_IDENTIFIER
- HEREDOC_STRING_ALTERNATIVE
- STRING_ESCAPES_ALLOWED_IN_RAW_STRINGS
- NESTED_COMMENTS
- WHITE_SPACE
- COMMANDS
- COMMAND_PREFIX_TOKENS
- NUMERIC_LITERALS
- dialect
- reset
- tokenize
- tokenize_rs
- size
- sql
- tokens
238 class Parser(parser.Parser): 239 # Tested in ClickHouse's playground, it seems that the following two queries do the same thing 240 # * select x from t1 union all select x from t2 limit 1; 241 # * select x from t1 union all (select x from t2 limit 1); 242 MODIFIERS_ATTACHED_TO_SET_OP = False 243 INTERVAL_SPANS = False 244 245 FUNCTIONS = { 246 **parser.Parser.FUNCTIONS, 247 "ANY": exp.AnyValue.from_arg_list, 248 "ARRAYSUM": exp.ArraySum.from_arg_list, 249 "COUNTIF": _build_count_if, 250 "DATE_ADD": build_date_delta(exp.DateAdd, default_unit=None), 251 "DATEADD": build_date_delta(exp.DateAdd, default_unit=None), 252 "DATE_DIFF": build_date_delta(exp.DateDiff, default_unit=None), 253 "DATEDIFF": build_date_delta(exp.DateDiff, default_unit=None), 254 "DATE_FORMAT": _build_date_format, 255 "DATE_SUB": build_date_delta(exp.DateSub, default_unit=None), 256 "DATESUB": build_date_delta(exp.DateSub, default_unit=None), 257 "FORMATDATETIME": _build_date_format, 258 "JSONEXTRACTSTRING": build_json_extract_path( 259 exp.JSONExtractScalar, zero_based_indexing=False 260 ), 261 "MAP": parser.build_var_map, 262 "MATCH": exp.RegexpLike.from_arg_list, 263 "RANDCANONICAL": exp.Rand.from_arg_list, 264 "STR_TO_DATE": _build_str_to_date, 265 "TUPLE": exp.Struct.from_arg_list, 266 "TIMESTAMP_SUB": build_date_delta(exp.TimestampSub, default_unit=None), 267 "TIMESTAMPSUB": build_date_delta(exp.TimestampSub, default_unit=None), 268 "TIMESTAMP_ADD": build_date_delta(exp.TimestampAdd, default_unit=None), 269 "TIMESTAMPADD": build_date_delta(exp.TimestampAdd, default_unit=None), 270 "UNIQ": exp.ApproxDistinct.from_arg_list, 271 "XOR": lambda args: exp.Xor(expressions=args), 272 "MD5": exp.MD5Digest.from_arg_list, 273 "SHA256": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(256)), 274 "SHA512": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(512)), 275 } 276 277 AGG_FUNCTIONS = { 278 "count", 279 "min", 280 "max", 281 "sum", 282 "avg", 283 "any", 284 "stddevPop", 285 "stddevSamp", 286 "varPop", 287 "varSamp", 288 "corr", 289 "covarPop", 290 "covarSamp", 291 "entropy", 292 "exponentialMovingAverage", 293 "intervalLengthSum", 294 "kolmogorovSmirnovTest", 295 "mannWhitneyUTest", 296 "median", 297 "rankCorr", 298 "sumKahan", 299 "studentTTest", 300 "welchTTest", 301 "anyHeavy", 302 "anyLast", 303 "boundingRatio", 304 "first_value", 305 "last_value", 306 "argMin", 307 "argMax", 308 "avgWeighted", 309 "topK", 310 "topKWeighted", 311 "deltaSum", 312 "deltaSumTimestamp", 313 "groupArray", 314 "groupArrayLast", 315 "groupUniqArray", 316 "groupArrayInsertAt", 317 "groupArrayMovingAvg", 318 "groupArrayMovingSum", 319 "groupArraySample", 320 "groupBitAnd", 321 "groupBitOr", 322 "groupBitXor", 323 "groupBitmap", 324 "groupBitmapAnd", 325 "groupBitmapOr", 326 "groupBitmapXor", 327 "sumWithOverflow", 328 "sumMap", 329 "minMap", 330 "maxMap", 331 "skewSamp", 332 "skewPop", 333 "kurtSamp", 334 "kurtPop", 335 "uniq", 336 "uniqExact", 337 "uniqCombined", 338 "uniqCombined64", 339 "uniqHLL12", 340 "uniqTheta", 341 "quantile", 342 "quantiles", 343 "quantileExact", 344 "quantilesExact", 345 "quantileExactLow", 346 "quantilesExactLow", 347 "quantileExactHigh", 348 "quantilesExactHigh", 349 "quantileExactWeighted", 350 "quantilesExactWeighted", 351 "quantileTiming", 352 "quantilesTiming", 353 "quantileTimingWeighted", 354 "quantilesTimingWeighted", 355 "quantileDeterministic", 356 "quantilesDeterministic", 357 "quantileTDigest", 358 "quantilesTDigest", 359 "quantileTDigestWeighted", 360 "quantilesTDigestWeighted", 361 "quantileBFloat16", 362 "quantilesBFloat16", 363 "quantileBFloat16Weighted", 364 "quantilesBFloat16Weighted", 365 "simpleLinearRegression", 366 "stochasticLinearRegression", 367 "stochasticLogisticRegression", 368 "categoricalInformationValue", 369 "contingency", 370 "cramersV", 371 "cramersVBiasCorrected", 372 "theilsU", 373 "maxIntersections", 374 "maxIntersectionsPosition", 375 "meanZTest", 376 "quantileInterpolatedWeighted", 377 "quantilesInterpolatedWeighted", 378 "quantileGK", 379 "quantilesGK", 380 "sparkBar", 381 "sumCount", 382 "largestTriangleThreeBuckets", 383 "histogram", 384 "sequenceMatch", 385 "sequenceCount", 386 "windowFunnel", 387 "retention", 388 "uniqUpTo", 389 "sequenceNextNode", 390 "exponentialTimeDecayedAvg", 391 } 392 393 AGG_FUNCTIONS_SUFFIXES = [ 394 "If", 395 "Array", 396 "ArrayIf", 397 "Map", 398 "SimpleState", 399 "State", 400 "Merge", 401 "MergeState", 402 "ForEach", 403 "Distinct", 404 "OrDefault", 405 "OrNull", 406 "Resample", 407 "ArgMin", 408 "ArgMax", 409 ] 410 411 FUNC_TOKENS = { 412 *parser.Parser.FUNC_TOKENS, 413 TokenType.SET, 414 } 415 416 RESERVED_TOKENS = parser.Parser.RESERVED_TOKENS - {TokenType.SELECT} 417 418 ID_VAR_TOKENS = { 419 *parser.Parser.ID_VAR_TOKENS, 420 TokenType.LIKE, 421 } 422 423 AGG_FUNC_MAPPING = ( 424 lambda functions, suffixes: { 425 f"{f}{sfx}": (f, sfx) for sfx in (suffixes + [""]) for f in functions 426 } 427 )(AGG_FUNCTIONS, AGG_FUNCTIONS_SUFFIXES) 428 429 FUNCTIONS_WITH_ALIASED_ARGS = {*parser.Parser.FUNCTIONS_WITH_ALIASED_ARGS, "TUPLE"} 430 431 FUNCTION_PARSERS = { 432 **parser.Parser.FUNCTION_PARSERS, 433 "ARRAYJOIN": lambda self: self.expression(exp.Explode, this=self._parse_expression()), 434 "QUANTILE": lambda self: self._parse_quantile(), 435 "COLUMNS": lambda self: self._parse_columns(), 436 } 437 438 FUNCTION_PARSERS.pop("MATCH") 439 440 NO_PAREN_FUNCTION_PARSERS = parser.Parser.NO_PAREN_FUNCTION_PARSERS.copy() 441 NO_PAREN_FUNCTION_PARSERS.pop("ANY") 442 443 NO_PAREN_FUNCTIONS = parser.Parser.NO_PAREN_FUNCTIONS.copy() 444 NO_PAREN_FUNCTIONS.pop(TokenType.CURRENT_TIMESTAMP) 445 446 RANGE_PARSERS = { 447 **parser.Parser.RANGE_PARSERS, 448 TokenType.GLOBAL: lambda self, this: self._match(TokenType.IN) 449 and self._parse_in(this, is_global=True), 450 } 451 452 # The PLACEHOLDER entry is popped because 1) it doesn't affect Clickhouse (it corresponds to 453 # the postgres-specific JSONBContains parser) and 2) it makes parsing the ternary op simpler. 454 COLUMN_OPERATORS = parser.Parser.COLUMN_OPERATORS.copy() 455 COLUMN_OPERATORS.pop(TokenType.PLACEHOLDER) 456 457 JOIN_KINDS = { 458 *parser.Parser.JOIN_KINDS, 459 TokenType.ANY, 460 TokenType.ASOF, 461 TokenType.ARRAY, 462 } 463 464 TABLE_ALIAS_TOKENS = parser.Parser.TABLE_ALIAS_TOKENS - { 465 TokenType.ANY, 466 TokenType.ARRAY, 467 TokenType.FINAL, 468 TokenType.FORMAT, 469 TokenType.SETTINGS, 470 } 471 472 ALIAS_TOKENS = parser.Parser.ALIAS_TOKENS - { 473 TokenType.FORMAT, 474 } 475 476 LOG_DEFAULTS_TO_LN = True 477 478 QUERY_MODIFIER_PARSERS = { 479 **parser.Parser.QUERY_MODIFIER_PARSERS, 480 TokenType.SETTINGS: lambda self: ( 481 "settings", 482 self._advance() or self._parse_csv(self._parse_assignment), 483 ), 484 TokenType.FORMAT: lambda self: ("format", self._advance() or self._parse_id_var()), 485 } 486 487 CONSTRAINT_PARSERS = { 488 **parser.Parser.CONSTRAINT_PARSERS, 489 "INDEX": lambda self: self._parse_index_constraint(), 490 "CODEC": lambda self: self._parse_compress(), 491 } 492 493 ALTER_PARSERS = { 494 **parser.Parser.ALTER_PARSERS, 495 "REPLACE": lambda self: self._parse_alter_table_replace(), 496 } 497 498 SCHEMA_UNNAMED_CONSTRAINTS = { 499 *parser.Parser.SCHEMA_UNNAMED_CONSTRAINTS, 500 "INDEX", 501 } 502 503 PLACEHOLDER_PARSERS = { 504 **parser.Parser.PLACEHOLDER_PARSERS, 505 TokenType.L_BRACE: lambda self: self._parse_query_parameter(), 506 } 507 508 # https://clickhouse.com/docs/en/sql-reference/statements/create/function 509 def _parse_user_defined_function_expression(self) -> t.Optional[exp.Expression]: 510 return self._parse_lambda() 511 512 def _parse_types( 513 self, check_func: bool = False, schema: bool = False, allow_identifiers: bool = True 514 ) -> t.Optional[exp.Expression]: 515 dtype = super()._parse_types( 516 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 517 ) 518 if isinstance(dtype, exp.DataType) and dtype.args.get("nullable") is not True: 519 # Mark every type as non-nullable which is ClickHouse's default, unless it's 520 # already marked as nullable. This marker helps us transpile types from other 521 # dialects to ClickHouse, so that we can e.g. produce `CAST(x AS Nullable(String))` 522 # from `CAST(x AS TEXT)`. If there is a `NULL` value in `x`, the former would 523 # fail in ClickHouse without the `Nullable` type constructor. 524 dtype.set("nullable", False) 525 526 return dtype 527 528 def _parse_extract(self) -> exp.Extract | exp.Anonymous: 529 index = self._index 530 this = self._parse_bitwise() 531 if self._match(TokenType.FROM): 532 self._retreat(index) 533 return super()._parse_extract() 534 535 # We return Anonymous here because extract and regexpExtract have different semantics, 536 # so parsing extract(foo, bar) into RegexpExtract can potentially break queries. E.g., 537 # `extract('foobar', 'b')` works, but ClickHouse crashes for `regexpExtract('foobar', 'b')`. 538 # 539 # TODO: can we somehow convert the former into an equivalent `regexpExtract` call? 540 self._match(TokenType.COMMA) 541 return self.expression( 542 exp.Anonymous, this="extract", expressions=[this, self._parse_bitwise()] 543 ) 544 545 def _parse_assignment(self) -> t.Optional[exp.Expression]: 546 this = super()._parse_assignment() 547 548 if self._match(TokenType.PLACEHOLDER): 549 return self.expression( 550 exp.If, 551 this=this, 552 true=self._parse_assignment(), 553 false=self._match(TokenType.COLON) and self._parse_assignment(), 554 ) 555 556 return this 557 558 def _parse_query_parameter(self) -> t.Optional[exp.Expression]: 559 """ 560 Parse a placeholder expression like SELECT {abc: UInt32} or FROM {table: Identifier} 561 https://clickhouse.com/docs/en/sql-reference/syntax#defining-and-using-query-parameters 562 """ 563 this = self._parse_id_var() 564 self._match(TokenType.COLON) 565 kind = self._parse_types(check_func=False, allow_identifiers=False) or ( 566 self._match_text_seq("IDENTIFIER") and "Identifier" 567 ) 568 569 if not kind: 570 self.raise_error("Expecting a placeholder type or 'Identifier' for tables") 571 elif not self._match(TokenType.R_BRACE): 572 self.raise_error("Expecting }") 573 574 return self.expression(exp.Placeholder, this=this, kind=kind) 575 576 def _parse_in(self, this: t.Optional[exp.Expression], is_global: bool = False) -> exp.In: 577 this = super()._parse_in(this) 578 this.set("is_global", is_global) 579 return this 580 581 def _parse_table( 582 self, 583 schema: bool = False, 584 joins: bool = False, 585 alias_tokens: t.Optional[t.Collection[TokenType]] = None, 586 parse_bracket: bool = False, 587 is_db_reference: bool = False, 588 parse_partition: bool = False, 589 ) -> t.Optional[exp.Expression]: 590 this = super()._parse_table( 591 schema=schema, 592 joins=joins, 593 alias_tokens=alias_tokens, 594 parse_bracket=parse_bracket, 595 is_db_reference=is_db_reference, 596 ) 597 598 if self._match(TokenType.FINAL): 599 this = self.expression(exp.Final, this=this) 600 601 return this 602 603 def _parse_position(self, haystack_first: bool = False) -> exp.StrPosition: 604 return super()._parse_position(haystack_first=True) 605 606 # https://clickhouse.com/docs/en/sql-reference/statements/select/with/ 607 def _parse_cte(self) -> exp.CTE: 608 # WITH <identifier> AS <subquery expression> 609 cte: t.Optional[exp.CTE] = self._try_parse(super()._parse_cte) 610 611 if not cte: 612 # WITH <expression> AS <identifier> 613 cte = self.expression( 614 exp.CTE, 615 this=self._parse_assignment(), 616 alias=self._parse_table_alias(), 617 scalar=True, 618 ) 619 620 return cte 621 622 def _parse_join_parts( 623 self, 624 ) -> t.Tuple[t.Optional[Token], t.Optional[Token], t.Optional[Token]]: 625 is_global = self._match(TokenType.GLOBAL) and self._prev 626 kind_pre = self._match_set(self.JOIN_KINDS, advance=False) and self._prev 627 628 if kind_pre: 629 kind = self._match_set(self.JOIN_KINDS) and self._prev 630 side = self._match_set(self.JOIN_SIDES) and self._prev 631 return is_global, side, kind 632 633 return ( 634 is_global, 635 self._match_set(self.JOIN_SIDES) and self._prev, 636 self._match_set(self.JOIN_KINDS) and self._prev, 637 ) 638 639 def _parse_join( 640 self, skip_join_token: bool = False, parse_bracket: bool = False 641 ) -> t.Optional[exp.Join]: 642 join = super()._parse_join(skip_join_token=skip_join_token, parse_bracket=True) 643 if join: 644 join.set("global", join.args.pop("method", None)) 645 646 # tbl ARRAY JOIN arr <-- this should be a `Column` reference, not a `Table` 647 # https://clickhouse.com/docs/en/sql-reference/statements/select/array-join 648 if join.kind == "ARRAY": 649 for table in join.find_all(exp.Table): 650 table.replace(table.to_column()) 651 652 return join 653 654 def _parse_function( 655 self, 656 functions: t.Optional[t.Dict[str, t.Callable]] = None, 657 anonymous: bool = False, 658 optional_parens: bool = True, 659 any_token: bool = False, 660 ) -> t.Optional[exp.Expression]: 661 expr = super()._parse_function( 662 functions=functions, 663 anonymous=anonymous, 664 optional_parens=optional_parens, 665 any_token=any_token, 666 ) 667 668 func = expr.this if isinstance(expr, exp.Window) else expr 669 670 # Aggregate functions can be split in 2 parts: <func_name><suffix> 671 parts = ( 672 self.AGG_FUNC_MAPPING.get(func.this) if isinstance(func, exp.Anonymous) else None 673 ) 674 675 if parts: 676 anon_func: exp.Anonymous = t.cast(exp.Anonymous, func) 677 params = self._parse_func_params(anon_func) 678 679 kwargs = { 680 "this": anon_func.this, 681 "expressions": anon_func.expressions, 682 } 683 if parts[1]: 684 kwargs["parts"] = parts 685 exp_class: t.Type[exp.Expression] = ( 686 exp.CombinedParameterizedAgg if params else exp.CombinedAggFunc 687 ) 688 else: 689 exp_class = exp.ParameterizedAgg if params else exp.AnonymousAggFunc 690 691 kwargs["exp_class"] = exp_class 692 if params: 693 kwargs["params"] = params 694 695 func = self.expression(**kwargs) 696 697 if isinstance(expr, exp.Window): 698 # The window's func was parsed as Anonymous in base parser, fix its 699 # type to be ClickHouse style CombinedAnonymousAggFunc / AnonymousAggFunc 700 expr.set("this", func) 701 elif params: 702 # Params have blocked super()._parse_function() from parsing the following window 703 # (if that exists) as they're standing between the function call and the window spec 704 expr = self._parse_window(func) 705 else: 706 expr = func 707 708 return expr 709 710 def _parse_func_params( 711 self, this: t.Optional[exp.Func] = None 712 ) -> t.Optional[t.List[exp.Expression]]: 713 if self._match_pair(TokenType.R_PAREN, TokenType.L_PAREN): 714 return self._parse_csv(self._parse_lambda) 715 716 if self._match(TokenType.L_PAREN): 717 params = self._parse_csv(self._parse_lambda) 718 self._match_r_paren(this) 719 return params 720 721 return None 722 723 def _parse_quantile(self) -> exp.Quantile: 724 this = self._parse_lambda() 725 params = self._parse_func_params() 726 if params: 727 return self.expression(exp.Quantile, this=params[0], quantile=this) 728 return self.expression(exp.Quantile, this=this, quantile=exp.Literal.number(0.5)) 729 730 def _parse_wrapped_id_vars(self, optional: bool = False) -> t.List[exp.Expression]: 731 return super()._parse_wrapped_id_vars(optional=True) 732 733 def _parse_primary_key( 734 self, wrapped_optional: bool = False, in_props: bool = False 735 ) -> exp.PrimaryKeyColumnConstraint | exp.PrimaryKey: 736 return super()._parse_primary_key( 737 wrapped_optional=wrapped_optional or in_props, in_props=in_props 738 ) 739 740 def _parse_on_property(self) -> t.Optional[exp.Expression]: 741 index = self._index 742 if self._match_text_seq("CLUSTER"): 743 this = self._parse_id_var() 744 if this: 745 return self.expression(exp.OnCluster, this=this) 746 else: 747 self._retreat(index) 748 return None 749 750 def _parse_index_constraint( 751 self, kind: t.Optional[str] = None 752 ) -> exp.IndexColumnConstraint: 753 # INDEX name1 expr TYPE type1(args) GRANULARITY value 754 this = self._parse_id_var() 755 expression = self._parse_assignment() 756 757 index_type = self._match_text_seq("TYPE") and ( 758 self._parse_function() or self._parse_var() 759 ) 760 761 granularity = self._match_text_seq("GRANULARITY") and self._parse_term() 762 763 return self.expression( 764 exp.IndexColumnConstraint, 765 this=this, 766 expression=expression, 767 index_type=index_type, 768 granularity=granularity, 769 ) 770 771 def _parse_partition(self) -> t.Optional[exp.Partition]: 772 # https://clickhouse.com/docs/en/sql-reference/statements/alter/partition#how-to-set-partition-expression 773 if not self._match(TokenType.PARTITION): 774 return None 775 776 if self._match_text_seq("ID"): 777 # Corresponds to the PARTITION ID <string_value> syntax 778 expressions: t.List[exp.Expression] = [ 779 self.expression(exp.PartitionId, this=self._parse_string()) 780 ] 781 else: 782 expressions = self._parse_expressions() 783 784 return self.expression(exp.Partition, expressions=expressions) 785 786 def _parse_alter_table_replace(self) -> t.Optional[exp.Expression]: 787 partition = self._parse_partition() 788 789 if not partition or not self._match(TokenType.FROM): 790 return None 791 792 return self.expression( 793 exp.ReplacePartition, expression=partition, source=self._parse_table_parts() 794 ) 795 796 def _parse_projection_def(self) -> t.Optional[exp.ProjectionDef]: 797 if not self._match_text_seq("PROJECTION"): 798 return None 799 800 return self.expression( 801 exp.ProjectionDef, 802 this=self._parse_id_var(), 803 expression=self._parse_wrapped(self._parse_statement), 804 ) 805 806 def _parse_constraint(self) -> t.Optional[exp.Expression]: 807 return super()._parse_constraint() or self._parse_projection_def() 808 809 def _parse_alias( 810 self, this: t.Optional[exp.Expression], explicit: bool = False 811 ) -> t.Optional[exp.Expression]: 812 # In clickhouse "SELECT <expr> APPLY(...)" is a query modifier, 813 # so "APPLY" shouldn't be parsed as <expr>'s alias. However, "SELECT <expr> apply" is a valid alias 814 if self._match_pair(TokenType.APPLY, TokenType.L_PAREN, advance=False): 815 return this 816 817 return super()._parse_alias(this=this, explicit=explicit) 818 819 def _parse_expression(self) -> t.Optional[exp.Expression]: 820 this = super()._parse_expression() 821 822 # Clickhouse allows "SELECT <expr> [APPLY(func)] [...]]" modifier 823 while self._match_pair(TokenType.APPLY, TokenType.L_PAREN): 824 this = exp.Apply(this=this, expression=self._parse_var(any_token=True)) 825 self._match(TokenType.R_PAREN) 826 827 return this 828 829 def _parse_columns(self) -> exp.Expression: 830 this: exp.Expression = self.expression(exp.Columns, this=self._parse_lambda()) 831 832 while self._next and self._match_text_seq(")", "APPLY", "("): 833 self._match(TokenType.R_PAREN) 834 this = exp.Apply(this=this, expression=self._parse_var(any_token=True)) 835 return this
Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree.
Arguments:
- error_level: The desired error level. Default: ErrorLevel.IMMEDIATE
- error_message_context: The amount of context to capture from a query string when displaying the error message (in number of characters). Default: 100
- max_errors: Maximum number of error messages to include in a raised ParseError. This is only relevant if error_level is ErrorLevel.RAISE. Default: 3
Inherited Members
- sqlglot.parser.Parser
- Parser
- STRUCT_TYPE_TOKENS
- NESTED_TYPE_TOKENS
- ENUM_TYPE_TOKENS
- AGGREGATE_TYPE_TOKENS
- TYPE_TOKENS
- SIGNED_TO_UNSIGNED_TYPE_TOKEN
- SUBQUERY_PREDICATES
- DB_CREATABLES
- CREATABLES
- ALTERABLES
- INTERVAL_VARS
- ARRAY_CONSTRUCTORS
- COMMENT_TABLE_ALIAS_TOKENS
- UPDATE_ALIAS_TOKENS
- TRIM_TYPES
- CONJUNCTION
- ASSIGNMENT
- DISJUNCTION
- EQUALITY
- COMPARISON
- BITWISE
- TERM
- FACTOR
- EXPONENT
- TIMES
- TIMESTAMPS
- SET_OPERATIONS
- JOIN_METHODS
- JOIN_SIDES
- JOIN_HINTS
- LAMBDAS
- EXPRESSION_PARSERS
- STATEMENT_PARSERS
- UNARY_PARSERS
- STRING_PARSERS
- NUMERIC_PARSERS
- PRIMARY_PARSERS
- PROPERTY_PARSERS
- ALTER_ALTER_PARSERS
- INVALID_FUNC_NAME_TOKENS
- KEY_VALUE_DEFINITIONS
- SET_PARSERS
- SHOW_PARSERS
- TYPE_LITERAL_PARSERS
- TYPE_CONVERTERS
- DDL_SELECT_TOKENS
- PRE_VOLATILE_TOKENS
- TRANSACTION_KIND
- TRANSACTION_CHARACTERISTICS
- CONFLICT_ACTIONS
- CREATE_SEQUENCE
- ISOLATED_LOADING_OPTIONS
- USABLES
- CAST_ACTIONS
- SCHEMA_BINDING_OPTIONS
- PROCEDURE_OPTIONS
- EXECUTE_AS_OPTIONS
- KEY_CONSTRAINT_OPTIONS
- INSERT_ALTERNATIVES
- CLONE_KEYWORDS
- HISTORICAL_DATA_PREFIX
- HISTORICAL_DATA_KIND
- OPCLASS_FOLLOW_KEYWORDS
- OPTYPE_FOLLOW_TOKENS
- TABLE_INDEX_HINT_TOKENS
- VIEW_ATTRIBUTES
- WINDOW_ALIAS_TOKENS
- WINDOW_BEFORE_PAREN_TOKENS
- WINDOW_SIDES
- JSON_KEY_VALUE_SEPARATOR_TOKENS
- FETCH_TOKENS
- ADD_CONSTRAINT_TOKENS
- DISTINCT_TOKENS
- NULL_TOKENS
- UNNEST_OFFSET_ALIAS_TOKENS
- SELECT_START_TOKENS
- COPY_INTO_VARLEN_OPTIONS
- IS_JSON_PREDICATE_KIND
- ODBC_DATETIME_LITERALS
- ON_CONDITION_TOKENS
- PRIVILEGE_FOLLOW_TOKENS
- DESCRIBE_STYLES
- OPERATION_MODIFIERS
- STRICT_CAST
- PREFIXED_PIVOT_COLUMNS
- IDENTIFY_PIVOT_STRINGS
- ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN
- TABLESAMPLE_CSV
- DEFAULT_SAMPLING_METHOD
- SET_REQUIRES_ASSIGNMENT_DELIMITER
- TRIM_PATTERN_FIRST
- STRING_ALIASES
- SET_OP_MODIFIERS
- NO_PAREN_IF_COMMANDS
- JSON_ARROWS_REQUIRE_JSON_TYPE
- COLON_IS_VARIANT_EXTRACT
- VALUES_FOLLOWED_BY_PAREN
- SUPPORTS_IMPLICIT_UNNEST
- SUPPORTS_PARTITION_SELECTION
- error_level
- error_message_context
- max_errors
- dialect
- reset
- parse
- parse_into
- check_errors
- raise_error
- expression
- validate_expression
- errors
- sql
837 class Generator(generator.Generator): 838 QUERY_HINTS = False 839 STRUCT_DELIMITER = ("(", ")") 840 NVL2_SUPPORTED = False 841 TABLESAMPLE_REQUIRES_PARENS = False 842 TABLESAMPLE_SIZE_IS_ROWS = False 843 TABLESAMPLE_KEYWORDS = "SAMPLE" 844 LAST_DAY_SUPPORTS_DATE_PART = False 845 CAN_IMPLEMENT_ARRAY_ANY = True 846 SUPPORTS_TO_NUMBER = False 847 JOIN_HINTS = False 848 TABLE_HINTS = False 849 GROUPINGS_SEP = "" 850 SET_OP_MODIFIERS = False 851 SUPPORTS_TABLE_ALIAS_COLUMNS = False 852 VALUES_AS_TABLE = False 853 854 STRING_TYPE_MAPPING = { 855 exp.DataType.Type.CHAR: "String", 856 exp.DataType.Type.LONGBLOB: "String", 857 exp.DataType.Type.LONGTEXT: "String", 858 exp.DataType.Type.MEDIUMBLOB: "String", 859 exp.DataType.Type.MEDIUMTEXT: "String", 860 exp.DataType.Type.TINYBLOB: "String", 861 exp.DataType.Type.TINYTEXT: "String", 862 exp.DataType.Type.TEXT: "String", 863 exp.DataType.Type.VARBINARY: "String", 864 exp.DataType.Type.VARCHAR: "String", 865 } 866 867 SUPPORTED_JSON_PATH_PARTS = { 868 exp.JSONPathKey, 869 exp.JSONPathRoot, 870 exp.JSONPathSubscript, 871 } 872 873 TYPE_MAPPING = { 874 **generator.Generator.TYPE_MAPPING, 875 **STRING_TYPE_MAPPING, 876 exp.DataType.Type.ARRAY: "Array", 877 exp.DataType.Type.BOOLEAN: "Bool", 878 exp.DataType.Type.BIGINT: "Int64", 879 exp.DataType.Type.DATE32: "Date32", 880 exp.DataType.Type.DATETIME: "DateTime", 881 exp.DataType.Type.DATETIME64: "DateTime64", 882 exp.DataType.Type.DECIMAL: "Decimal", 883 exp.DataType.Type.DECIMAL32: "Decimal32", 884 exp.DataType.Type.DECIMAL64: "Decimal64", 885 exp.DataType.Type.DECIMAL128: "Decimal128", 886 exp.DataType.Type.DECIMAL256: "Decimal256", 887 exp.DataType.Type.TIMESTAMP: "DateTime", 888 exp.DataType.Type.TIMESTAMPTZ: "DateTime", 889 exp.DataType.Type.DOUBLE: "Float64", 890 exp.DataType.Type.ENUM: "Enum", 891 exp.DataType.Type.ENUM8: "Enum8", 892 exp.DataType.Type.ENUM16: "Enum16", 893 exp.DataType.Type.FIXEDSTRING: "FixedString", 894 exp.DataType.Type.FLOAT: "Float32", 895 exp.DataType.Type.INT: "Int32", 896 exp.DataType.Type.MEDIUMINT: "Int32", 897 exp.DataType.Type.INT128: "Int128", 898 exp.DataType.Type.INT256: "Int256", 899 exp.DataType.Type.LOWCARDINALITY: "LowCardinality", 900 exp.DataType.Type.MAP: "Map", 901 exp.DataType.Type.NESTED: "Nested", 902 exp.DataType.Type.SMALLINT: "Int16", 903 exp.DataType.Type.STRUCT: "Tuple", 904 exp.DataType.Type.TINYINT: "Int8", 905 exp.DataType.Type.UBIGINT: "UInt64", 906 exp.DataType.Type.UINT: "UInt32", 907 exp.DataType.Type.UINT128: "UInt128", 908 exp.DataType.Type.UINT256: "UInt256", 909 exp.DataType.Type.USMALLINT: "UInt16", 910 exp.DataType.Type.UTINYINT: "UInt8", 911 exp.DataType.Type.IPV4: "IPv4", 912 exp.DataType.Type.IPV6: "IPv6", 913 exp.DataType.Type.POINT: "Point", 914 exp.DataType.Type.RING: "Ring", 915 exp.DataType.Type.LINESTRING: "LineString", 916 exp.DataType.Type.MULTILINESTRING: "MultiLineString", 917 exp.DataType.Type.POLYGON: "Polygon", 918 exp.DataType.Type.MULTIPOLYGON: "MultiPolygon", 919 exp.DataType.Type.AGGREGATEFUNCTION: "AggregateFunction", 920 exp.DataType.Type.SIMPLEAGGREGATEFUNCTION: "SimpleAggregateFunction", 921 } 922 923 TRANSFORMS = { 924 **generator.Generator.TRANSFORMS, 925 exp.AnyValue: rename_func("any"), 926 exp.ApproxDistinct: rename_func("uniq"), 927 exp.ArrayFilter: lambda self, e: self.func("arrayFilter", e.expression, e.this), 928 exp.ArraySize: rename_func("LENGTH"), 929 exp.ArraySum: rename_func("arraySum"), 930 exp.ArgMax: arg_max_or_min_no_count("argMax"), 931 exp.ArgMin: arg_max_or_min_no_count("argMin"), 932 exp.Array: inline_array_sql, 933 exp.CastToStrType: rename_func("CAST"), 934 exp.CountIf: rename_func("countIf"), 935 exp.CompressColumnConstraint: lambda self, 936 e: f"CODEC({self.expressions(e, key='this', flat=True)})", 937 exp.ComputedColumnConstraint: lambda self, 938 e: f"{'MATERIALIZED' if e.args.get('persisted') else 'ALIAS'} {self.sql(e, 'this')}", 939 exp.CurrentDate: lambda self, e: self.func("CURRENT_DATE"), 940 exp.DateAdd: _datetime_delta_sql("DATE_ADD"), 941 exp.DateDiff: _datetime_delta_sql("DATE_DIFF"), 942 exp.DateStrToDate: rename_func("toDate"), 943 exp.DateSub: _datetime_delta_sql("DATE_SUB"), 944 exp.Explode: rename_func("arrayJoin"), 945 exp.Final: lambda self, e: f"{self.sql(e, 'this')} FINAL", 946 exp.IsNan: rename_func("isNaN"), 947 exp.JSONExtract: json_extract_segments("JSONExtractString", quoted_index=False), 948 exp.JSONExtractScalar: json_extract_segments("JSONExtractString", quoted_index=False), 949 exp.JSONPathKey: json_path_key_only_name, 950 exp.JSONPathRoot: lambda *_: "", 951 exp.Map: lambda self, e: _lower_func(var_map_sql(self, e)), 952 exp.Nullif: rename_func("nullIf"), 953 exp.PartitionedByProperty: lambda self, e: f"PARTITION BY {self.sql(e, 'this')}", 954 exp.Pivot: no_pivot_sql, 955 exp.Quantile: _quantile_sql, 956 exp.RegexpLike: lambda self, e: self.func("match", e.this, e.expression), 957 exp.Rand: rename_func("randCanonical"), 958 exp.StartsWith: rename_func("startsWith"), 959 exp.StrPosition: lambda self, e: self.func( 960 "position", e.this, e.args.get("substr"), e.args.get("position") 961 ), 962 exp.TimeToStr: lambda self, e: self.func( 963 "formatDateTime", e.this, self.format_time(e), e.args.get("zone") 964 ), 965 exp.TimeStrToTime: _timestrtotime_sql, 966 exp.TimestampAdd: _datetime_delta_sql("TIMESTAMP_ADD"), 967 exp.TimestampSub: _datetime_delta_sql("TIMESTAMP_SUB"), 968 exp.VarMap: lambda self, e: _lower_func(var_map_sql(self, e)), 969 exp.Xor: lambda self, e: self.func("xor", e.this, e.expression, *e.expressions), 970 exp.MD5Digest: rename_func("MD5"), 971 exp.MD5: lambda self, e: self.func("LOWER", self.func("HEX", self.func("MD5", e.this))), 972 exp.SHA: rename_func("SHA1"), 973 exp.SHA2: sha256_sql, 974 exp.UnixToTime: _unix_to_time_sql, 975 exp.TimestampTrunc: timestamptrunc_sql(zone=True), 976 exp.Trim: trim_sql, 977 exp.Variance: rename_func("varSamp"), 978 exp.SchemaCommentProperty: lambda self, e: self.naked_property(e), 979 exp.Stddev: rename_func("stddevSamp"), 980 exp.Chr: rename_func("CHAR"), 981 exp.Lag: lambda self, e: self.func( 982 "lagInFrame", e.this, e.args.get("offset"), e.args.get("default") 983 ), 984 exp.Lead: lambda self, e: self.func( 985 "leadInFrame", e.this, e.args.get("offset"), e.args.get("default") 986 ), 987 } 988 989 PROPERTIES_LOCATION = { 990 **generator.Generator.PROPERTIES_LOCATION, 991 exp.OnCluster: exp.Properties.Location.POST_NAME, 992 exp.PartitionedByProperty: exp.Properties.Location.POST_SCHEMA, 993 exp.ToTableProperty: exp.Properties.Location.POST_NAME, 994 exp.VolatileProperty: exp.Properties.Location.UNSUPPORTED, 995 } 996 997 # There's no list in docs, but it can be found in Clickhouse code 998 # see `ClickHouse/src/Parsers/ParserCreate*.cpp` 999 ON_CLUSTER_TARGETS = { 1000 "SCHEMA", # Transpiled CREATE SCHEMA may have OnCluster property set 1001 "DATABASE", 1002 "TABLE", 1003 "VIEW", 1004 "DICTIONARY", 1005 "INDEX", 1006 "FUNCTION", 1007 "NAMED COLLECTION", 1008 } 1009 1010 # https://clickhouse.com/docs/en/sql-reference/data-types/nullable 1011 NON_NULLABLE_TYPES = { 1012 exp.DataType.Type.ARRAY, 1013 exp.DataType.Type.MAP, 1014 exp.DataType.Type.STRUCT, 1015 exp.DataType.Type.POINT, 1016 exp.DataType.Type.RING, 1017 exp.DataType.Type.LINESTRING, 1018 exp.DataType.Type.MULTILINESTRING, 1019 exp.DataType.Type.POLYGON, 1020 exp.DataType.Type.MULTIPOLYGON, 1021 } 1022 1023 def strtodate_sql(self, expression: exp.StrToDate) -> str: 1024 strtodate_sql = self.function_fallback_sql(expression) 1025 1026 if not isinstance(expression.parent, exp.Cast): 1027 # StrToDate returns DATEs in other dialects (eg. postgres), so 1028 # this branch aims to improve the transpilation to clickhouse 1029 return f"CAST({strtodate_sql} AS DATE)" 1030 1031 return strtodate_sql 1032 1033 def cast_sql(self, expression: exp.Cast, safe_prefix: t.Optional[str] = None) -> str: 1034 this = expression.this 1035 1036 if isinstance(this, exp.StrToDate) and expression.to == exp.DataType.build("datetime"): 1037 return self.sql(this) 1038 1039 return super().cast_sql(expression, safe_prefix=safe_prefix) 1040 1041 def trycast_sql(self, expression: exp.TryCast) -> str: 1042 dtype = expression.to 1043 if not dtype.is_type(*self.NON_NULLABLE_TYPES, check_nullable=True): 1044 # Casting x into Nullable(T) appears to behave similarly to TRY_CAST(x AS T) 1045 dtype.set("nullable", True) 1046 1047 return super().cast_sql(expression) 1048 1049 def _jsonpathsubscript_sql(self, expression: exp.JSONPathSubscript) -> str: 1050 this = self.json_path_part(expression.this) 1051 return str(int(this) + 1) if is_int(this) else this 1052 1053 def likeproperty_sql(self, expression: exp.LikeProperty) -> str: 1054 return f"AS {self.sql(expression, 'this')}" 1055 1056 def _any_to_has( 1057 self, 1058 expression: exp.EQ | exp.NEQ, 1059 default: t.Callable[[t.Any], str], 1060 prefix: str = "", 1061 ) -> str: 1062 if isinstance(expression.left, exp.Any): 1063 arr = expression.left 1064 this = expression.right 1065 elif isinstance(expression.right, exp.Any): 1066 arr = expression.right 1067 this = expression.left 1068 else: 1069 return default(expression) 1070 1071 return prefix + self.func("has", arr.this.unnest(), this) 1072 1073 def eq_sql(self, expression: exp.EQ) -> str: 1074 return self._any_to_has(expression, super().eq_sql) 1075 1076 def neq_sql(self, expression: exp.NEQ) -> str: 1077 return self._any_to_has(expression, super().neq_sql, "NOT ") 1078 1079 def regexpilike_sql(self, expression: exp.RegexpILike) -> str: 1080 # Manually add a flag to make the search case-insensitive 1081 regex = self.func("CONCAT", "'(?i)'", expression.expression) 1082 return self.func("match", expression.this, regex) 1083 1084 def datatype_sql(self, expression: exp.DataType) -> str: 1085 # String is the standard ClickHouse type, every other variant is just an alias. 1086 # Additionally, any supplied length parameter will be ignored. 1087 # 1088 # https://clickhouse.com/docs/en/sql-reference/data-types/string 1089 if expression.this in self.STRING_TYPE_MAPPING: 1090 dtype = "String" 1091 else: 1092 dtype = super().datatype_sql(expression) 1093 1094 # This section changes the type to `Nullable(...)` if the following conditions hold: 1095 # - It's marked as nullable - this ensures we won't wrap ClickHouse types with `Nullable` 1096 # and change their semantics 1097 # - It's not the key type of a `Map`. This is because ClickHouse enforces the following 1098 # constraint: "Type of Map key must be a type, that can be represented by integer or 1099 # String or FixedString (possibly LowCardinality) or UUID or IPv6" 1100 # - It's not a composite type, e.g. `Nullable(Array(...))` is not a valid type 1101 parent = expression.parent 1102 nullable = expression.args.get("nullable") 1103 if nullable is True or ( 1104 nullable is None 1105 and not ( 1106 isinstance(parent, exp.DataType) 1107 and parent.is_type(exp.DataType.Type.MAP, check_nullable=True) 1108 and expression.index in (None, 0) 1109 ) 1110 and not expression.is_type(*self.NON_NULLABLE_TYPES, check_nullable=True) 1111 ): 1112 dtype = f"Nullable({dtype})" 1113 1114 return dtype 1115 1116 def cte_sql(self, expression: exp.CTE) -> str: 1117 if expression.args.get("scalar"): 1118 this = self.sql(expression, "this") 1119 alias = self.sql(expression, "alias") 1120 return f"{this} AS {alias}" 1121 1122 return super().cte_sql(expression) 1123 1124 def after_limit_modifiers(self, expression: exp.Expression) -> t.List[str]: 1125 return super().after_limit_modifiers(expression) + [ 1126 ( 1127 self.seg("SETTINGS ") + self.expressions(expression, key="settings", flat=True) 1128 if expression.args.get("settings") 1129 else "" 1130 ), 1131 ( 1132 self.seg("FORMAT ") + self.sql(expression, "format") 1133 if expression.args.get("format") 1134 else "" 1135 ), 1136 ] 1137 1138 def parameterizedagg_sql(self, expression: exp.ParameterizedAgg) -> str: 1139 params = self.expressions(expression, key="params", flat=True) 1140 return self.func(expression.name, *expression.expressions) + f"({params})" 1141 1142 def anonymousaggfunc_sql(self, expression: exp.AnonymousAggFunc) -> str: 1143 return self.func(expression.name, *expression.expressions) 1144 1145 def combinedaggfunc_sql(self, expression: exp.CombinedAggFunc) -> str: 1146 return self.anonymousaggfunc_sql(expression) 1147 1148 def combinedparameterizedagg_sql(self, expression: exp.CombinedParameterizedAgg) -> str: 1149 return self.parameterizedagg_sql(expression) 1150 1151 def placeholder_sql(self, expression: exp.Placeholder) -> str: 1152 return f"{{{expression.name}: {self.sql(expression, 'kind')}}}" 1153 1154 def oncluster_sql(self, expression: exp.OnCluster) -> str: 1155 return f"ON CLUSTER {self.sql(expression, 'this')}" 1156 1157 def createable_sql(self, expression: exp.Create, locations: t.DefaultDict) -> str: 1158 if expression.kind in self.ON_CLUSTER_TARGETS and locations.get( 1159 exp.Properties.Location.POST_NAME 1160 ): 1161 this_name = self.sql( 1162 expression.this if isinstance(expression.this, exp.Schema) else expression, 1163 "this", 1164 ) 1165 this_properties = " ".join( 1166 [self.sql(prop) for prop in locations[exp.Properties.Location.POST_NAME]] 1167 ) 1168 this_schema = self.schema_columns_sql(expression.this) 1169 this_schema = f"{self.sep()}{this_schema}" if this_schema else "" 1170 1171 return f"{this_name}{self.sep()}{this_properties}{this_schema}" 1172 1173 return super().createable_sql(expression, locations) 1174 1175 def create_sql(self, expression: exp.Create) -> str: 1176 # The comment property comes last in CTAS statements, i.e. after the query 1177 query = expression.expression 1178 if isinstance(query, exp.Query): 1179 comment_prop = expression.find(exp.SchemaCommentProperty) 1180 if comment_prop: 1181 comment_prop.pop() 1182 query.replace(exp.paren(query)) 1183 else: 1184 comment_prop = None 1185 1186 create_sql = super().create_sql(expression) 1187 1188 comment_sql = self.sql(comment_prop) 1189 comment_sql = f" {comment_sql}" if comment_sql else "" 1190 1191 return f"{create_sql}{comment_sql}" 1192 1193 def prewhere_sql(self, expression: exp.PreWhere) -> str: 1194 this = self.indent(self.sql(expression, "this")) 1195 return f"{self.seg('PREWHERE')}{self.sep()}{this}" 1196 1197 def indexcolumnconstraint_sql(self, expression: exp.IndexColumnConstraint) -> str: 1198 this = self.sql(expression, "this") 1199 this = f" {this}" if this else "" 1200 expr = self.sql(expression, "expression") 1201 expr = f" {expr}" if expr else "" 1202 index_type = self.sql(expression, "index_type") 1203 index_type = f" TYPE {index_type}" if index_type else "" 1204 granularity = self.sql(expression, "granularity") 1205 granularity = f" GRANULARITY {granularity}" if granularity else "" 1206 1207 return f"INDEX{this}{expr}{index_type}{granularity}" 1208 1209 def partition_sql(self, expression: exp.Partition) -> str: 1210 return f"PARTITION {self.expressions(expression, flat=True)}" 1211 1212 def partitionid_sql(self, expression: exp.PartitionId) -> str: 1213 return f"ID {self.sql(expression.this)}" 1214 1215 def replacepartition_sql(self, expression: exp.ReplacePartition) -> str: 1216 return ( 1217 f"REPLACE {self.sql(expression.expression)} FROM {self.sql(expression, 'source')}" 1218 ) 1219 1220 def projectiondef_sql(self, expression: exp.ProjectionDef) -> str: 1221 return f"PROJECTION {self.sql(expression.this)} {self.wrap(expression.expression)}" 1222 1223 def is_sql(self, expression: exp.Is) -> str: 1224 is_sql = super().is_sql(expression) 1225 1226 if isinstance(expression.parent, exp.Not) and isinstance(expression.this, exp.Subquery): 1227 # WHERE (SELECT ...) IS NOT NULL -> NOT ((SELECT ...) IS NULL) 1228 is_sql = self.wrap(is_sql) 1229 1230 return is_sql
Generator converts a given syntax tree to the corresponding SQL string.
Arguments:
- pretty: Whether to format the produced SQL string. Default: False.
- identify: Determines when an identifier should be quoted. Possible values are: False (default): Never quote, except in cases where it's mandatory by the dialect. True or 'always': Always quote. 'safe': Only quote identifiers that are case insensitive.
- normalize: Whether to normalize identifiers to lowercase. Default: False.
- pad: The pad size in a formatted string. For example, this affects the indentation of a projection in a query, relative to its nesting level. Default: 2.
- indent: The indentation size in a formatted string. For example, this affects the
indentation of subqueries and filters under a
WHERE
clause. Default: 2. - normalize_functions: How to normalize function names. Possible values are: "upper" or True (default): Convert names to uppercase. "lower": Convert names to lowercase. False: Disables function name normalization.
- unsupported_level: Determines the generator's behavior when it encounters unsupported expressions. Default ErrorLevel.WARN.
- max_unsupported: Maximum number of unsupported messages to include in a raised UnsupportedError. This is only relevant if unsupported_level is ErrorLevel.RAISE. Default: 3
- leading_comma: Whether the comma is leading or trailing in select expressions. This is only relevant when generating in pretty mode. Default: False
- max_text_width: The max number of characters in a segment before creating new lines in pretty mode. The default is on the smaller end because the length only represents a segment and not the true line length. Default: 80
- comments: Whether to preserve comments in the output SQL code. Default: True
1023 def strtodate_sql(self, expression: exp.StrToDate) -> str: 1024 strtodate_sql = self.function_fallback_sql(expression) 1025 1026 if not isinstance(expression.parent, exp.Cast): 1027 # StrToDate returns DATEs in other dialects (eg. postgres), so 1028 # this branch aims to improve the transpilation to clickhouse 1029 return f"CAST({strtodate_sql} AS DATE)" 1030 1031 return strtodate_sql
1033 def cast_sql(self, expression: exp.Cast, safe_prefix: t.Optional[str] = None) -> str: 1034 this = expression.this 1035 1036 if isinstance(this, exp.StrToDate) and expression.to == exp.DataType.build("datetime"): 1037 return self.sql(this) 1038 1039 return super().cast_sql(expression, safe_prefix=safe_prefix)
1041 def trycast_sql(self, expression: exp.TryCast) -> str: 1042 dtype = expression.to 1043 if not dtype.is_type(*self.NON_NULLABLE_TYPES, check_nullable=True): 1044 # Casting x into Nullable(T) appears to behave similarly to TRY_CAST(x AS T) 1045 dtype.set("nullable", True) 1046 1047 return super().cast_sql(expression)
1084 def datatype_sql(self, expression: exp.DataType) -> str: 1085 # String is the standard ClickHouse type, every other variant is just an alias. 1086 # Additionally, any supplied length parameter will be ignored. 1087 # 1088 # https://clickhouse.com/docs/en/sql-reference/data-types/string 1089 if expression.this in self.STRING_TYPE_MAPPING: 1090 dtype = "String" 1091 else: 1092 dtype = super().datatype_sql(expression) 1093 1094 # This section changes the type to `Nullable(...)` if the following conditions hold: 1095 # - It's marked as nullable - this ensures we won't wrap ClickHouse types with `Nullable` 1096 # and change their semantics 1097 # - It's not the key type of a `Map`. This is because ClickHouse enforces the following 1098 # constraint: "Type of Map key must be a type, that can be represented by integer or 1099 # String or FixedString (possibly LowCardinality) or UUID or IPv6" 1100 # - It's not a composite type, e.g. `Nullable(Array(...))` is not a valid type 1101 parent = expression.parent 1102 nullable = expression.args.get("nullable") 1103 if nullable is True or ( 1104 nullable is None 1105 and not ( 1106 isinstance(parent, exp.DataType) 1107 and parent.is_type(exp.DataType.Type.MAP, check_nullable=True) 1108 and expression.index in (None, 0) 1109 ) 1110 and not expression.is_type(*self.NON_NULLABLE_TYPES, check_nullable=True) 1111 ): 1112 dtype = f"Nullable({dtype})" 1113 1114 return dtype
1124 def after_limit_modifiers(self, expression: exp.Expression) -> t.List[str]: 1125 return super().after_limit_modifiers(expression) + [ 1126 ( 1127 self.seg("SETTINGS ") + self.expressions(expression, key="settings", flat=True) 1128 if expression.args.get("settings") 1129 else "" 1130 ), 1131 ( 1132 self.seg("FORMAT ") + self.sql(expression, "format") 1133 if expression.args.get("format") 1134 else "" 1135 ), 1136 ]
1157 def createable_sql(self, expression: exp.Create, locations: t.DefaultDict) -> str: 1158 if expression.kind in self.ON_CLUSTER_TARGETS and locations.get( 1159 exp.Properties.Location.POST_NAME 1160 ): 1161 this_name = self.sql( 1162 expression.this if isinstance(expression.this, exp.Schema) else expression, 1163 "this", 1164 ) 1165 this_properties = " ".join( 1166 [self.sql(prop) for prop in locations[exp.Properties.Location.POST_NAME]] 1167 ) 1168 this_schema = self.schema_columns_sql(expression.this) 1169 this_schema = f"{self.sep()}{this_schema}" if this_schema else "" 1170 1171 return f"{this_name}{self.sep()}{this_properties}{this_schema}" 1172 1173 return super().createable_sql(expression, locations)
1175 def create_sql(self, expression: exp.Create) -> str: 1176 # The comment property comes last in CTAS statements, i.e. after the query 1177 query = expression.expression 1178 if isinstance(query, exp.Query): 1179 comment_prop = expression.find(exp.SchemaCommentProperty) 1180 if comment_prop: 1181 comment_prop.pop() 1182 query.replace(exp.paren(query)) 1183 else: 1184 comment_prop = None 1185 1186 create_sql = super().create_sql(expression) 1187 1188 comment_sql = self.sql(comment_prop) 1189 comment_sql = f" {comment_sql}" if comment_sql else "" 1190 1191 return f"{create_sql}{comment_sql}"
1197 def indexcolumnconstraint_sql(self, expression: exp.IndexColumnConstraint) -> str: 1198 this = self.sql(expression, "this") 1199 this = f" {this}" if this else "" 1200 expr = self.sql(expression, "expression") 1201 expr = f" {expr}" if expr else "" 1202 index_type = self.sql(expression, "index_type") 1203 index_type = f" TYPE {index_type}" if index_type else "" 1204 granularity = self.sql(expression, "granularity") 1205 granularity = f" GRANULARITY {granularity}" if granularity else "" 1206 1207 return f"INDEX{this}{expr}{index_type}{granularity}"
1223 def is_sql(self, expression: exp.Is) -> str: 1224 is_sql = super().is_sql(expression) 1225 1226 if isinstance(expression.parent, exp.Not) and isinstance(expression.this, exp.Subquery): 1227 # WHERE (SELECT ...) IS NOT NULL -> NOT ((SELECT ...) IS NULL) 1228 is_sql = self.wrap(is_sql) 1229 1230 return is_sql
Inherited Members
- sqlglot.generator.Generator
- Generator
- NULL_ORDERING_SUPPORTED
- IGNORE_NULLS_IN_FUNC
- LOCKING_READS_SUPPORTED
- EXCEPT_INTERSECT_SUPPORT_ALL_CLAUSE
- WRAP_DERIVED_VALUES
- CREATE_FUNCTION_RETURN_AS
- MATCHED_BY_SOURCE
- SINGLE_STRING_INTERVAL
- INTERVAL_ALLOWS_PLURAL_FORM
- LIMIT_FETCH
- LIMIT_ONLY_LITERALS
- RENAME_TABLE_WITH_DB
- INDEX_ON
- QUERY_HINT_SEP
- IS_BOOL_ALLOWED
- DUPLICATE_KEY_UPDATE_WITH_SET
- LIMIT_IS_TOP
- RETURNING_END
- EXTRACT_ALLOWS_QUOTES
- TZ_TO_WITH_TIME_ZONE
- ALTER_TABLE_INCLUDE_COLUMN_KEYWORD
- UNNEST_WITH_ORDINALITY
- AGGREGATE_FILTER_SUPPORTED
- SEMI_ANTI_JOIN_WITH_SIDE
- COMPUTED_COLUMN_WITH_TYPE
- SUPPORTS_TABLE_COPY
- TABLESAMPLE_WITH_METHOD
- TABLESAMPLE_SEED_KEYWORD
- COLLATE_IS_FUNC
- DATA_TYPE_SPECIFIERS_ALLOWED
- ENSURE_BOOLS
- CTE_RECURSIVE_KEYWORD_REQUIRED
- SUPPORTS_SINGLE_ARG_CONCAT
- UNPIVOT_ALIASES_ARE_IDENTIFIERS
- JSON_KEY_VALUE_PAIR_SEP
- INSERT_OVERWRITE
- SUPPORTS_SELECT_INTO
- SUPPORTS_UNLOGGED_TABLES
- SUPPORTS_CREATE_TABLE_LIKE
- LIKE_PROPERTY_INSIDE_SCHEMA
- MULTI_ARG_DISTINCT
- JSON_TYPE_REQUIRED_FOR_EXTRACTION
- JSON_PATH_BRACKETED_KEY_SUPPORTED
- JSON_PATH_SINGLE_QUOTE_ESCAPE
- COPY_PARAMS_ARE_WRAPPED
- COPY_PARAMS_EQ_REQUIRED
- COPY_HAS_INTO_KEYWORD
- STAR_EXCEPT
- HEX_FUNC
- WITH_PROPERTIES_PREFIX
- QUOTE_JSON_PATH
- PAD_FILL_PATTERN_IS_REQUIRED
- SUPPORTS_EXPLODING_PROJECTIONS
- ARRAY_CONCAT_IS_VAR_LEN
- SUPPORTS_CONVERT_TIMEZONE
- PARSE_JSON_NAME
- TIME_PART_SINGULARS
- TOKEN_MAPPING
- PARAMETER_TOKEN
- NAMED_PLACEHOLDER_TOKEN
- RESERVED_KEYWORDS
- WITH_SEPARATED_COMMENTS
- EXCLUDE_COMMENTS
- UNWRAPPED_INTERVAL_VALUES
- PARAMETERIZABLE_TEXT_TYPES
- EXPRESSIONS_WITHOUT_NESTED_CTES
- SENTINEL_LINE_BREAK
- pretty
- identify
- normalize
- pad
- unsupported_level
- max_unsupported
- leading_comma
- max_text_width
- comments
- dialect
- normalize_functions
- unsupported_messages
- generate
- preprocess
- unsupported
- sep
- seg
- pad_comment
- maybe_comment
- wrap
- no_identify
- normalize_func
- indent
- sql
- uncache_sql
- cache_sql
- characterset_sql
- column_parts
- column_sql
- columnposition_sql
- columndef_sql
- columnconstraint_sql
- computedcolumnconstraint_sql
- autoincrementcolumnconstraint_sql
- compresscolumnconstraint_sql
- generatedasidentitycolumnconstraint_sql
- generatedasrowcolumnconstraint_sql
- periodforsystemtimeconstraint_sql
- notnullcolumnconstraint_sql
- transformcolumnconstraint_sql
- primarykeycolumnconstraint_sql
- uniquecolumnconstraint_sql
- sequenceproperties_sql
- clone_sql
- describe_sql
- heredoc_sql
- prepend_ctes
- with_sql
- tablealias_sql
- bitstring_sql
- hexstring_sql
- bytestring_sql
- unicodestring_sql
- rawstring_sql
- datatypeparam_sql
- directory_sql
- delete_sql
- drop_sql
- set_operation
- set_operations
- fetch_sql
- filter_sql
- hint_sql
- indexparameters_sql
- index_sql
- identifier_sql
- hex_sql
- lowerhex_sql
- inputoutputformat_sql
- national_sql
- properties_sql
- root_properties
- properties
- with_properties
- locate_properties
- property_name
- property_sql
- fallbackproperty_sql
- journalproperty_sql
- freespaceproperty_sql
- checksumproperty_sql
- mergeblockratioproperty_sql
- datablocksizeproperty_sql
- blockcompressionproperty_sql
- isolatedloadingproperty_sql
- partitionboundspec_sql
- partitionedofproperty_sql
- lockingproperty_sql
- withdataproperty_sql
- withsystemversioningproperty_sql
- insert_sql
- introducer_sql
- kill_sql
- pseudotype_sql
- objectidentifier_sql
- onconflict_sql
- returning_sql
- rowformatdelimitedproperty_sql
- withtablehint_sql
- indextablehint_sql
- historicaldata_sql
- table_parts
- table_sql
- tablesample_sql
- pivot_sql
- version_sql
- tuple_sql
- update_sql
- values_sql
- var_sql
- into_sql
- from_sql
- groupingsets_sql
- rollup_sql
- cube_sql
- group_sql
- having_sql
- connect_sql
- prior_sql
- join_sql
- lambda_sql
- lateral_op
- lateral_sql
- limit_sql
- offset_sql
- setitem_sql
- set_sql
- pragma_sql
- lock_sql
- literal_sql
- escape_str
- loaddata_sql
- null_sql
- boolean_sql
- order_sql
- withfill_sql
- cluster_sql
- distribute_sql
- sort_sql
- ordered_sql
- matchrecognizemeasure_sql
- matchrecognize_sql
- query_modifiers
- options_modifier
- queryoption_sql
- offset_limit_modifiers
- select_sql
- schema_sql
- schema_columns_sql
- star_sql
- parameter_sql
- sessionparameter_sql
- subquery_sql
- qualify_sql
- unnest_sql
- where_sql
- window_sql
- partition_by_sql
- windowspec_sql
- withingroup_sql
- between_sql
- bracket_offset_expressions
- bracket_sql
- all_sql
- any_sql
- exists_sql
- case_sql
- constraint_sql
- nextvaluefor_sql
- extract_sql
- trim_sql
- convert_concat_args
- concat_sql
- concatws_sql
- check_sql
- foreignkey_sql
- primarykey_sql
- if_sql
- matchagainst_sql
- jsonkeyvalue_sql
- jsonpath_sql
- json_path_part
- formatjson_sql
- jsonobject_sql
- jsonobjectagg_sql
- jsonarray_sql
- jsonarrayagg_sql
- jsoncolumndef_sql
- jsonschema_sql
- jsontable_sql
- openjsoncolumndef_sql
- openjson_sql
- in_sql
- in_unnest_op
- interval_sql
- return_sql
- reference_sql
- anonymous_sql
- paren_sql
- neg_sql
- not_sql
- alias_sql
- pivotalias_sql
- aliases_sql
- atindex_sql
- attimezone_sql
- fromtimezone_sql
- add_sql
- and_sql
- or_sql
- xor_sql
- connector_sql
- bitwiseand_sql
- bitwiseleftshift_sql
- bitwisenot_sql
- bitwiseor_sql
- bitwiserightshift_sql
- bitwisexor_sql
- currentdate_sql
- collate_sql
- command_sql
- comment_sql
- mergetreettlaction_sql
- mergetreettl_sql
- transaction_sql
- commit_sql
- rollback_sql
- altercolumn_sql
- alterdiststyle_sql
- altersortkey_sql
- alterrename_sql
- renamecolumn_sql
- alterset_sql
- alter_sql
- add_column_sql
- droppartition_sql
- addconstraint_sql
- distinct_sql
- ignorenulls_sql
- respectnulls_sql
- havingmax_sql
- intdiv_sql
- dpipe_sql
- div_sql
- overlaps_sql
- distance_sql
- dot_sql
- propertyeq_sql
- escape_sql
- glob_sql
- gt_sql
- gte_sql
- ilike_sql
- ilikeany_sql
- like_sql
- likeany_sql
- similarto_sql
- lt_sql
- lte_sql
- mod_sql
- mul_sql
- nullsafeeq_sql
- nullsafeneq_sql
- slice_sql
- sub_sql
- try_sql
- log_sql
- use_sql
- binary
- function_fallback_sql
- func
- format_args
- too_wide
- format_time
- expressions
- op_expressions
- naked_property
- tag_sql
- token_sql
- userdefinedfunction_sql
- joinhint_sql
- kwarg_sql
- when_sql
- merge_sql
- tochar_sql
- tonumber_sql
- dictproperty_sql
- dictrange_sql
- dictsubproperty_sql
- duplicatekeyproperty_sql
- distributedbyproperty_sql
- clusteredbyproperty_sql
- anyvalue_sql
- querytransform_sql
- indexconstraintoption_sql
- checkcolumnconstraint_sql
- nvl2_sql
- comprehension_sql
- columnprefix_sql
- opclass_sql
- predict_sql
- forin_sql
- refresh_sql
- toarray_sql
- tsordstotime_sql
- tsordstotimestamp_sql
- tsordstodate_sql
- unixdate_sql
- lastday_sql
- dateadd_sql
- arrayany_sql
- struct_sql
- partitionrange_sql
- truncatetable_sql
- convert_sql
- copyparameter_sql
- credentials_sql
- copy_sql
- semicolon_sql
- datadeletionproperty_sql
- maskingpolicycolumnconstraint_sql
- gapfill_sql
- scope_resolution
- scoperesolution_sql
- parsejson_sql
- rand_sql
- changes_sql
- pad_sql
- summarize_sql
- explodinggenerateseries_sql
- arrayconcat_sql
- converttimezone_sql
- json_sql
- jsonvalue_sql
- conditionalinsert_sql
- multitableinserts_sql
- oncondition_sql
- jsonexists_sql
- arrayagg_sql
- apply_sql
- grant_sql
- grantprivilege_sql
- grantprincipal_sql
- columns_sql
- overlay_sql
- todouble_sql
- string_sql