Clickhouse 配置

/etc/clickhouse-server/config.xml
/etc/clickhouse-server/users.xml
/etc/clickhouse-server/config.d
/etc/clickhouse-server/users.d
- XML, YAML
参考

# clickhouse-benchmark
# 会命令行启动 clickhouse server
curl -LO https://raw.githubusercontent.com/ClickHouse/ClickBench/main/hardware/hardware.sh
chmod a+x hardware.sh
./hardware.sh

# skip_check_for_incorrect_settings: 1 here.

# users 里可根据用户配置
# logger 实现 https://github.com/pocoproject/poco/blob/poco-1.9.4-release/Foundation/include/Poco/Logger.h#L105-L114
logger:
  # none fatal critical error warning notice information debug trace
  level: trace
  log: /var/log/clickhouse-server/clickhouse-server.log
  errorlog: /var/log/clickhouse-server/clickhouse-server.err.log
  # Rotation policy
  # See https://github.com/pocoproject/poco/blob/poco-1.9.4-release/Foundation/include/Poco/FileChannel.h#L54-L85
  size: 1000M
  count: 10
  # console: 1 # 默认自动检测

  # Per level overrides (legacy):
  # For example to suppress logging of the ConfigReloader you can use:
  # NOTE: levels.logger is reserved, see below.
  # levels:
  #     ConfigReloader: none

  # Per level overrides:
  # For example to suppress logging of the RBAC for default user you can use:
  # (But please note that the logger name maybe changed from version to version, even after minor upgrade)
  # levels:
  #     - logger:
  #         name: 'ContextAccess (default)'
  #         level: none
  #     - logger:
  #         name: 'DatabaseOrdinary (test)'
  #         level: none

# It is the name that will be shown in the clickhouse-client.
# By default, anything with "production" will be highlighted in red in query prompt.
# display_name: production

http_port: 8123
# https_port: 8443

# native protocol
# - clickhouse-client  - clickhouse-benchmark, clickhouse-copier
# - clickhouse-server
# - native protocol driver
tcp_port: 9000
# tcp_port_secure: 9440

mysql_port: 9004
postgresql_port: 9005

# PROXYv1
# tcp_with_proxy_port: 9011

# 内部 replicas 通讯 - data exchange
# 避免不信任客户端访问
interserver_http_port: 9009
# interserver_https_port: 9010
# 默认 hostname -f
# interserver_http_host: example.clickhouse.com
# avoid SSRF attacks
# interserver_http_credentials:
#     user: interserver
#     password: ''

# 监听地址
# listen_host: '::'
# listen_host: 0.0.0.0

# 监听本地
# listen_host: '::1'
# listen_host: 127.0.0.1

# listen_try: 0
# 监听相同端口 - 不推荐
# listen_reuse_port: 0
# listen_backlog: 64
max_connections: 4096

# HTTP 1.1 'Connection: keep-alive'
keep_alive_timeout: 3

# src/Server/grpc_protos/clickhouse_grpc.proto
# grpc_port: 9100
grpc:
  enable_ssl: false

  # The following two files are used only if enable_ssl=1
  ssl_cert_file: /path/to/ssl_cert_file
  ssl_key_file: /path/to/ssl_key_file

  # Whether server will request client for a certificate
  ssl_require_client_auth: false

  # The following file is used only if ssl_require_client_auth=1
  ssl_ca_cert_file: /path/to/ssl_ca_cert_file

  # Default compression algorithm (applied if client doesn't specify another algorithm).
  # Supported algorithms: none, deflate, gzip, stream_gzip
  compression: deflate

  # Default compression level (applied if client doesn't specify another level).
  # Supported levels: none, low, medium, high
  compression_level: medium

  # Send/receive message size limits in bytes. -1 means unlimited
  max_send_message_size: -1
  max_receive_message_size: -1

  # Enable if you want very detailed logs
  verbose_logs: false

# 所有选项 https://github.com/ClickHouse-Extras/poco/blob/master/NetSSL_OpenSSL/include/Poco/Net/SSLManager.h#L71
openSSL:
  server:
    # Used for https server AND secure tcp port
    # openssl req -subj "/CN=localhost" -new -newkey rsa:2048 -days 365 -nodes -x509 -keyout /etc/clickhouse-server/server.key -out /etc/clickhouse-server/server.crt
    # certificateFile: /etc/clickhouse-server/server.crt
    # privateKeyFile: /etc/clickhouse-server/server.key

    # dhparams are optional. You can delete the dhParamsFile: element.
    # To generate dhparams, use the following command:
    # openssl dhparam -out /etc/clickhouse-server/dhparam.pem 4096
    # Only file format with BEGIN DH PARAMETERS is supported.
    dhParamsFile: /etc/clickhouse-server/dhparam.pem
    verificationMode: none
    loadDefaultCAFile: true
    cacheSessions: true
    disableProtocols: 'sslv2,sslv3'
    preferServerCiphers: true
  client:
    # Used for connecting to https dictionary source and secured Zookeeper communication
    loadDefaultCAFile: true
    cacheSessions: true
    disableProtocols: 'sslv2,sslv3'
    preferServerCiphers: true

    # Use for self-signed: verificationMode: none
    invalidCertificateHandler:
      # Use for self-signed: name: AcceptCertificateHandler
      name: RejectCertificateHandler

# https://github.com/tabixio/tabix
# 请求 http://localhost:8123 时的默认响应
# http_server_default_response: |-
#     <html ng-app="SMI2"><head><base href="http://ui.tabix.io/"></head><body><div ui-view="" class="content-ui"></div><script src="http://loader.tabix.io/master.js"></script></body></html>

# Maximum number of concurrent queries.
max_concurrent_queries: 100

# Maximum memory usage (resident set size) for server process.
# Zero value or unset means default. Default is "max_server_memory_usage_to_ram_ratio" of available physical RAM.
# If the value is larger than "max_server_memory_usage_to_ram_ratio" of available physical RAM, it will be cut down.

# The constraint is checked on query execution time.
# If a query tries to allocate memory and the current memory usage plus allocation is greater
# than specified threshold, exception will be thrown.

# It is not practical to set this constraint to small values like just a few gigabytes,
# because memory allocator will keep this amount of memory in caches and the server will deny service of queries.
max_server_memory_usage: 0

# Maximum number of threads in the Global thread pool.
# This will default to a maximum of 10000 threads if not specified.
# This setting will be useful in scenarios where there are a large number
# of distributed queries that are running concurrently but are idling most
# of the time, in which case a higher number of threads might be required.
max_thread_pool_size: 10000

# On memory constrained environments you may have to set this to value larger than 1.
max_server_memory_usage_to_ram_ratio: 0.9

# Simple server-wide memory profiler. Collect a stack trace at every peak allocation step (in bytes).
# Data will be stored in system.trace_log table with query_id = empty string.
# Zero means disabled.
total_memory_profiler_step: 4194304

# Collect random allocations and deallocations and write them into system.trace_log with 'MemorySample' trace_type.
# The probability is for every alloc/free regardless to the size of the allocation.
# Note that sampling happens only when the amount of untracked memory exceeds the untracked memory limit,
# which is 4 MiB by default but can be lowered if 'total_memory_profiler_step' is lowered.
# You may want to set 'total_memory_profiler_step' to 1 for extra fine grained sampling.
total_memory_tracker_sample_probability: 0

# Set limit on number of open files (default: maximum). This setting makes sense on Mac OS X because getrlimit() fails to retrieve
# correct maximum value.
# max_open_files: 262144

# Size of cache of uncompressed blocks of data, used in tables of MergeTree family.
# In bytes. Cache is single for server. Memory is allocated only on demand.
# Cache is used when 'use_uncompressed_cache' user setting turned on (off by default).
# Uncompressed cache is advantageous only for very short queries and in rare cases.

# Note: uncompressed cache can be pointless for lz4, because memory bandwidth
# is slower than multi-core decompression on some server configurations.
# Enabling it can sometimes paradoxically make queries slower.
uncompressed_cache_size: 8589934592

# Approximate size of mark cache, used in tables of MergeTree family.
# In bytes. Cache is single for server. Memory is allocated only on demand.
# You should not lower this value.
mark_cache_size: 5368709120

# If you enable the `min_bytes_to_use_mmap_io` setting,
# the data in MergeTree tables can be read with mmap to avoid copying from kernel to userspace.
# It makes sense only for large files and helps only if data reside in page cache.
# To avoid frequent open/mmap/munmap/close calls (which are very expensive due to consequent page faults)
# and to reuse mappings from several threads and queries,
# the cache of mapped files is maintained. Its size is the number of mapped regions (usually equal to the number of mapped files).
# The amount of data in mapped files can be monitored
# in system.metrics, system.metric_log by the MMappedFiles, MMappedFileBytes metrics
# and in system.asynchronous_metrics, system.asynchronous_metrics_log by the MMapCacheCells metric,
# and also in system.events, system.processes, system.query_log, system.query_thread_log, system.query_views_log by the
# CreatedReadBufferMMap, CreatedReadBufferMMapFailed, MMappedFileCacheHits, MMappedFileCacheMisses events.
# Note that the amount of data in mapped files does not consume memory directly and is not accounted
# in query or server memory usage - because this memory can be discarded similar to OS page cache.
# The cache is dropped (the files are closed) automatically on removal of old parts in MergeTree,
# also it can be dropped manually by the SYSTEM DROP MMAP CACHE query.
mmap_cache_size: 1000

# Cache size in bytes for compiled expressions.
compiled_expression_cache_size: 134217728

# Cache size in elements for compiled expressions.
compiled_expression_cache_elements_size: 10000

# 数据目录
path: /var/lib/clickhouse/
# 临时数据目录
tmp_path: /var/lib/clickhouse/tmp/

# Policy from the <storage_configuration> for the temporary files.
# If not set <tmp_path> is used, otherwise <tmp_path> is ignored.

# Notes:
# - move_factor              is ignored
# - keep_free_space_bytes    is ignored
# - max_data_part_size_bytes is ignored
# - you must have exactly one volume in that policy
# tmp_policy: tmp

# 用户文件目录 - file 函数, SQLIte 访问等
user_files_path: /var/lib/clickhouse/user_files/

# LDAP server definitions.
ldap_servers: ''

# List LDAP servers with their connection parameters here to later 1) use them as authenticators for dedicated local users,
# who have 'ldap' authentication mechanism specified instead of 'password', or to 2) use them as remote user directories.
# Parameters:
# host - LDAP server hostname or IP, this parameter is mandatory and cannot be empty.
# port - LDAP server port, default is 636 if enable_tls is set to true, 389 otherwise.
# bind_dn - template used to construct the DN to bind to.
# The resulting DN will be constructed by replacing all '{user_name}' substrings of the template with the actual
# user name during each authentication attempt.
# user_dn_detection - section with LDAP search parameters for detecting the actual user DN of the bound user.
# This is mainly used in search filters for further role mapping when the server is Active Directory. The
# resulting user DN will be used when replacing '{user_dn}' substrings wherever they are allowed. By default,
# user DN is set equal to bind DN, but once search is performed, it will be updated with to the actual detected
# user DN value.
# base_dn - template used to construct the base DN for the LDAP search.
# The resulting DN will be constructed by replacing all '{user_name}' and '{bind_dn}' substrings
# of the template with the actual user name and bind DN during the LDAP search.
# scope - scope of the LDAP search.
# Accepted values are: 'base', 'one_level', 'children', 'subtree' (the default).
# search_filter - template used to construct the search filter for the LDAP search.
# The resulting filter will be constructed by replacing all '{user_name}', '{bind_dn}', and '{base_dn}'
# substrings of the template with the actual user name, bind DN, and base DN during the LDAP search.
# Note, that the special characters must be escaped properly in XML.
# verification_cooldown - a period of time, in seconds, after a successful bind attempt, during which a user will be assumed
# to be successfully authenticated for all consecutive requests without contacting the LDAP server.
# Specify 0 (the default) to disable caching and force contacting the LDAP server for each authentication request.
# enable_tls - flag to trigger use of secure connection to the LDAP server.
# Specify 'no' for plain text (ldap://) protocol (not recommended).
# Specify 'yes' for LDAP over SSL/TLS (ldaps://) protocol (recommended, the default).
# Specify 'starttls' for legacy StartTLS protocol (plain text (ldap://) protocol, upgraded to TLS).
# tls_minimum_protocol_version - the minimum protocol version of SSL/TLS.
# Accepted values are: 'ssl2', 'ssl3', 'tls1.0', 'tls1.1', 'tls1.2' (the default).
# tls_require_cert - SSL/TLS peer certificate verification behavior.
# Accepted values are: 'never', 'allow', 'try', 'demand' (the default).
# tls_cert_file - path to certificate file.
# tls_key_file - path to certificate key file.
# tls_ca_cert_file - path to CA certificate file.
# tls_ca_cert_dir - path to the directory containing CA certificates.
# tls_cipher_suite - allowed cipher suite (in OpenSSL notation).
# Example:
# my_ldap_server:
#     host: localhost
#     port: 636
#     bind_dn: 'uid={user_name},ou=users,dc=example,dc=com'
#     verification_cooldown: 300
#     enable_tls: yes
#     tls_minimum_protocol_version: tls1.2
#     tls_require_cert: demand
#     tls_cert_file: /path/to/tls_cert_file
#     tls_key_file: /path/to/tls_key_file
#     tls_ca_cert_file: /path/to/tls_ca_cert_file
#     tls_ca_cert_dir: /path/to/tls_ca_cert_dir
#     tls_cipher_suite: ECDHE-ECDSA-AES256-GCM-SHA384:ECDHE-RSA-AES256-GCM-SHA384:AES256-GCM-SHA384

# Example (typical Active Directory with configured user DN detection for further role mapping):
# my_ad_server:
#     host: localhost
#     port: 389
#     bind_dn: 'EXAMPLE\{user_name}'
#     user_dn_detection:
#         base_dn: CN=Users,DC=example,DC=com
#         search_filter: '(&amp;(objectClass=user)(sAMAccountName={user_name}))'
#     enable_tls: no

# To enable Kerberos authentication support for HTTP requests (GSS-SPNEGO), for those users who are explicitly configured
# to authenticate via Kerberos, define a single 'kerberos' section here.
# Parameters:
# principal - canonical service principal name, that will be acquired and used when accepting security contexts.
# This parameter is optional, if omitted, the default principal will be used.
# This parameter cannot be specified together with 'realm' parameter.
# realm - a realm, that will be used to restrict authentication to only those requests whose initiator's realm matches it.
# This parameter is optional, if omitted, no additional filtering by realm will be applied.
# This parameter cannot be specified together with 'principal' parameter.
# Example:
# kerberos: ''

# Example:
# kerberos:
#     principal: HTTP/[email protected]

# Example:
# kerberos:
#     realm: EXAMPLE.COM

# Sources to read users, roles, access rights, profiles of settings, quotas.
user_directories:
  users_xml:
    # Path to configuration file with predefined users.
    path: users.yaml
  local_directory:
    # Path to folder where users created by SQL commands are stored.
    path: /var/lib/clickhouse/access/

#   # To add an LDAP server as a remote user directory of users that are not defined locally, define a single 'ldap' section
#   # with the following parameters:
#   # server - one of LDAP server names defined in 'ldap_servers' config section above.
#   # This parameter is mandatory and cannot be empty.
#   # roles - section with a list of locally defined roles that will be assigned to each user retrieved from the LDAP server.
#   # If no roles are specified here or assigned during role mapping (below), user will not be able to perform any
#   # actions after authentication.
#   # role_mapping - section with LDAP search parameters and mapping rules.
#   # When a user authenticates, while still bound to LDAP, an LDAP search is performed using search_filter and the
#   # name of the logged in user. For each entry found during that search, the value of the specified attribute is
#   # extracted. For each attribute value that has the specified prefix, the prefix is removed, and the rest of the
#   # value becomes the name of a local role defined in ClickHouse, which is expected to be created beforehand by
#   # CREATE ROLE command.
#   # There can be multiple 'role_mapping' sections defined inside the same 'ldap' section. All of them will be
#   # applied.
#   # base_dn - template used to construct the base DN for the LDAP search.
#   # The resulting DN will be constructed by replacing all '{user_name}', '{bind_dn}', and '{user_dn}'
#   # substrings of the template with the actual user name, bind DN, and user DN during each LDAP search.
#   # scope - scope of the LDAP search.
#   # Accepted values are: 'base', 'one_level', 'children', 'subtree' (the default).
#   # search_filter - template used to construct the search filter for the LDAP search.
#   # The resulting filter will be constructed by replacing all '{user_name}', '{bind_dn}', '{user_dn}', and
#   # '{base_dn}' substrings of the template with the actual user name, bind DN, user DN, and base DN during
#   # each LDAP search.
#   # Note, that the special characters must be escaped properly in XML.
#   # attribute - attribute name whose values will be returned by the LDAP search. 'cn', by default.
#   # prefix - prefix, that will be expected to be in front of each string in the original list of strings returned by
#   # the LDAP search. Prefix will be removed from the original strings and resulting strings will be treated
#   # as local role names. Empty, by default.
#   # Example:
#   # ldap:
#   #     server: my_ldap_server
#   #     roles:
#   #         my_local_role1: ''
#   #         my_local_role2: ''
#   #     role_mapping:
#   #         base_dn: 'ou=groups,dc=example,dc=com'
#   #         scope: subtree
#   #         search_filter: '(&amp;(objectClass=groupOfNames)(member={bind_dn}))'
#   #         attribute: cn
#   #         prefix: clickhouse_
#   # Example (typical Active Directory with role mapping that relies on the detected user DN):
#   # ldap:
#   #     server: my_ad_server
#   #     role_mapping:
#   #         base_dn: 'CN=Users,DC=example,DC=com'
#   #         attribute: CN
#   #         scope: subtree
#   #         search_filter: '(&amp;(objectClass=group)(member={user_dn}))'
#   #         prefix: clickhouse_

# 默认配置
default_profile: default

# Comma-separated list of prefixes for user-defined settings.
# custom_settings_prefixes: ''
# system_profile: default
# buffer_profile: default

# 默认数据库
default_database: default

# Server time zone could be set here.

# Time zone is used when converting between String and DateTime types,
# when printing DateTime in text formats and parsing DateTime from text,
# it is used in date and time related functions, if specific time zone was not passed as an argument.

# Time zone is specified as identifier from IANA time zone database, like UTC or Africa/Abidjan.
# If not specified, system time zone at server startup is used.

# Please note, that server could display time zone alias instead of specified name.
# Example: Zulu is an alias for UTC.
# timezone: UTC

# You can specify umask here (see "man umask"). Server will apply it on startup.
# Number is always parsed as octal. Default umask is 027 (other users cannot read logs, data files, etc; group can only read).
# umask: 022

# Perform mlockall after startup to lower first queries latency
# and to prevent clickhouse executable from being paged out under high IO load.
# Enabling this option is recommended but will lead to increased startup time for up to a few seconds.
mlock_executable: true

# Reallocate memory for machine code ("text") using huge pages. Highly experimental.
remap_executable: false

# Uncomment below in order to use JDBC table engine and function.
# To install and run JDBC bridge in background:
# * [Debian/Ubuntu]
# export MVN_URL=https://repo1.maven.org/maven2/ru/yandex/clickhouse/clickhouse-jdbc-bridge
# export PKG_VER=$(curl -sL $MVN_URL/maven-metadata.xml | grep '<release>' | sed -e 's|.*>\(.*\)<.*|\1|')
# wget https://github.com/ClickHouse/clickhouse-jdbc-bridge/releases/download/v$PKG_VER/clickhouse-jdbc-bridge_$PKG_VER-1_all.deb
# apt install --no-install-recommends -f ./clickhouse-jdbc-bridge_$PKG_VER-1_all.deb
# clickhouse-jdbc-bridge &
# * [CentOS/RHEL]
# export MVN_URL=https://repo1.maven.org/maven2/ru/yandex/clickhouse/clickhouse-jdbc-bridge
# export PKG_VER=$(curl -sL $MVN_URL/maven-metadata.xml | grep '<release>' | sed -e 's|.*>\(.*\)<.*|\1|')
# wget https://github.com/ClickHouse/clickhouse-jdbc-bridge/releases/download/v$PKG_VER/clickhouse-jdbc-bridge-$PKG_VER-1.noarch.rpm
# yum localinstall -y clickhouse-jdbc-bridge-$PKG_VER-1.noarch.rpm
# clickhouse-jdbc-bridge &
# Please refer to https://github.com/ClickHouse/clickhouse-jdbc-bridge#usage for more information.

# jdbc_bridge:
#     host: 127.0.0.1
#     port: 9019

# Configuration of clusters that could be used in Distributed tables.
# https://clickhouse.com/docs/en/operations/table_engines/distributed/
remote_servers:
  # Test only shard config for testing distributed storage
  test_shard_localhost:
    # Inter-server per-cluster secret for Distributed queries
    # default: no secret (no authentication will be performed)

    # If set, then Distributed queries will be validated on shards, so at least:
    # - such cluster should exist on the shard,
    # - such cluster should have the same secret.

    # And also (and which is more important), the initial_user will
    # be used as current user for the query.

    # Right now the protocol is pretty simple and it only takes into account:
    # - cluster name
    # - query

    # Also it will be nice if the following will be implemented:
    # - source hostname (see interserver_http_host), but then it will depends from DNS,
    # it can use IP address instead, but then the you need to get correct on the initiator node.
    # - target hostname / ip address (same notes as for source hostname)
    # - time-based security tokens
    # secret: ''
    shard:
      # Optional. Whether to write data to just one of the replicas. Default: false (write data to all replicas).
      # internal_replication: false
      # Optional. Shard weight when writing data. Default: 1.
      # weight: 1
      replica:
        host: localhost
        port: 9000
        # Optional. Priority of the replica for load_balancing. Default: 1 (less value has more priority).
        # priority: 1
  test_cluster_two_shards_localhost:
    shard:
      - replica:
          host: localhost
          port: 9000
      - replica:
          host: localhost
          port: 9000
  test_cluster_two_shards:
    shard:
      - replica:
          host: 127.0.0.1
          port: 9000
      - replica:
          host: 127.0.0.2
          port: 9000
  test_cluster_two_shards_internal_replication:
    shard:
      - internal_replication: true
        replica:
          host: 127.0.0.1
          port: 9000
      - internal_replication: true
        replica:
          host: 127.0.0.2
          port: 9000
  test_shard_localhost_secure:
    shard:
      replica:
        host: localhost
        port: 9440
        secure: 1
  test_unavailable_shard:
    shard:
      - replica:
          host: localhost
          port: 9000
      - replica:
          host: localhost
          port: 1

# The list of hosts allowed to use in URL-related storage engines and table functions.
# If this section is not present in configuration, all hosts are allowed.
# remote_url_allow_hosts:

# Host should be specified exactly as in URL. The name is checked before DNS resolution.
# Example: "clickhouse.com", "clickhouse.com." and "www.clickhouse.com" are different hosts.
# If port is explicitly specified in URL, the host:port is checked as a whole.
# If host specified here without port, any port with this host allowed.
# "clickhouse.com" -> "clickhouse.com:443", "clickhouse.com:80" etc. is allowed, but "clickhouse.com:80" -> only "clickhouse.com:80" is allowed.
# If the host is specified as IP address, it is checked as specified in URL. Example: "[2a02:6b8:a::a]".
# If there are redirects and support for redirects is enabled, every redirect (the Location field) is checked.

# Regular expression can be specified. RE2 engine is used for regexps.
# Regexps are not aligned: don't forget to add ^ and $. Also don't forget to escape dot (.) metacharacter
# (forgetting to do so is a common source of error).

# If element has 'incl' attribute, then for it's value will be used corresponding substitution from another file.
# By default, path to file with substitutions is /etc/metrika.xml. It could be changed in config in 'include_from' element.
# Values for substitutions are specified in /clickhouse/name_of_substitution elements in that file.

# ZooKeeper is used to store metadata about replicas, when using Replicated tables.
# Optional. If you don't use replicated tables, you could omit that.
# See https://clickhouse.com/docs/en/engines/table-engines/mergetree-family/replication/

# zookeeper:
#     - node:
#         host: example1
#         port: 2181
#     - node:
#         host: example2
#         port: 2181
#     - node:
#         host: example3
#         port: 2181

# Substitutions for parameters of replicated tables.
# Optional. If you don't use replicated tables, you could omit that.
# See https://clickhouse.com/docs/en/engines/table-engines/mergetree-family/replication/#creating-replicated-tables
# macros:
#     shard: 01
#     replica: example01-01-1

# Reloading interval for embedded dictionaries, in seconds. Default: 3600.
builtin_dictionaries_reload_interval: 3600

# Maximum session timeout, in seconds. Default: 3600.
max_session_timeout: 3600

# Default session timeout, in seconds. Default: 60.
default_session_timeout: 60

# Sending data to Graphite for monitoring. Several sections can be defined.
# interval - send every X second
# root_path - prefix for keys
# hostname_in_path - append hostname to root_path (default = true)
# metrics - send data from table system.metrics
# events - send data from table system.events
# asynchronous_metrics - send data from table system.asynchronous_metrics

# graphite:
#     host: localhost
#     port: 42000
#     timeout: 0.1
#     interval: 60
#     root_path: one_min
#     hostname_in_path: true

#     metrics: true
#     events: true
#     events_cumulative: false
#     asynchronous_metrics: true

# graphite:
#     host: localhost
#     port: 42000
#     timeout: 0.1
#     interval: 1
#     root_path: one_sec

#     metrics: true
#     events: true
#     events_cumulative: false
#     asynchronous_metrics: false

# Serve endpoint for Prometheus monitoring.
# endpoint - mertics path (relative to root, statring with "/")
# port - port to setup server. If not defined or 0 than http_port used
# metrics - send data from table system.metrics
# events - send data from table system.events
# asynchronous_metrics - send data from table system.asynchronous_metrics
# status_info - send data from different component from CH, ex: Dictionaries status

prometheus:
  endpoint: /metrics
  port: 9363

  metrics: true
  events: true
  asynchronous_metrics: true
  status_info: true

# 查询日志配置 - 需要 log_queries = 1
query_log:
  database: system
  table: query_log

  # PARTITION BY expr: https://clickhouse.com/docs/en/table_engines/mergetree-family/custom_partitioning_key/
  # Example:
  # event_date
  # toMonday(event_date)
  # toYYYYMM(event_date)
  # toStartOfHour(event_time)
  partition_by: toYYYYMM(event_date)

  # Table TTL specification: https://clickhouse.com/docs/en/engines/table-engines/mergetree-family/mergetree/#mergetree-table-ttl
  # Example:
  # event_date + INTERVAL 1 WEEK
  # event_date + INTERVAL 7 DAY DELETE
  # event_date + INTERVAL 2 WEEK TO DISK 'bbb'

  # ttl: 'event_date + INTERVAL 30 DAY DELETE'

  # Instead of partition_by, you can provide full engine expression (starting with ENGINE = ) with parameters,
  # Example: engine: 'ENGINE = MergeTree PARTITION BY toYYYYMM(event_date) ORDER BY (event_date, event_time) SETTINGS index_granularity = 1024'

  # Interval of flushing data.
  flush_interval_milliseconds: 7500

# Trace log. Stores stack traces collected by query profilers.
# See query_profiler_real_time_period_ns and query_profiler_cpu_time_period_ns settings.
trace_log:
  database: system
  table: trace_log
  partition_by: toYYYYMM(event_date)
  flush_interval_milliseconds: 7500

# Query thread log. Has information about all threads participated in query execution.
# Used only for queries with setting log_query_threads = 1.
query_thread_log:
  database: system
  table: query_thread_log
  partition_by: toYYYYMM(event_date)
  flush_interval_milliseconds: 7500

# Query views log. Has information about all dependent views associated with a query.
# Used only for queries with setting log_query_views = 1.
query_views_log:
  database: system
  table: query_views_log
  partition_by: toYYYYMM(event_date)
  flush_interval_milliseconds: 7500

# Uncomment if use part log.
# Part log contains information about all actions with parts in MergeTree tables (creation, deletion, merges, downloads).
part_log:
  database: system
  table: part_log
  partition_by: toYYYYMM(event_date)
  flush_interval_milliseconds: 7500

# Uncomment to write text log into table.
# Text log contains all information from usual server log but stores it in structured and efficient way.
# The level of the messages that goes to the table can be limited (<level>), if not specified all messages will go to the table.
# text_log:
#     database: system
#     table: text_log
#     flush_interval_milliseconds: 7500
#     level: ''

# Metric log contains rows with current values of ProfileEvents, CurrentMetrics collected with "collect_interval_milliseconds" interval.
metric_log:
  database: system
  table: metric_log
  flush_interval_milliseconds: 7500
  collect_interval_milliseconds: 1000

# Asynchronous metric log contains values of metrics from
# system.asynchronous_metrics.
asynchronous_metric_log:
  database: system
  table: asynchronous_metric_log

  # Asynchronous metrics are updated once a minute, so there is
  # no need to flush more often.
  flush_interval_milliseconds: 60000

# OpenTelemetry log contains OpenTelemetry trace spans.
opentelemetry_span_log:
  # The default table creation code is insufficient, this <engine> spec
  # is a workaround. There is no 'event_time' for this log, but two times,
  # start and finish. It is sorted by finish time, to avoid inserting
  # data too far away in the past (probably we can sometimes insert a span
  # that is seconds earlier than the last span in the table, due to a race
  # between several spans inserted in parallel). This gives the spans a
  # global order that we can use to e.g. retry insertion into some external
  # system.
  engine: |-
    engine MergeTree
         partition by toYYYYMM(finish_date)
         order by (finish_date, finish_time_us, trace_id)
  database: system
  table: opentelemetry_span_log
  flush_interval_milliseconds: 7500

# Crash log. Stores stack traces for fatal errors.
# This table is normally empty.
crash_log:
  database: system
  table: crash_log
  partition_by: ''
  flush_interval_milliseconds: 1000

# top_level_domains_path: /var/lib/clickhouse/top_level_domains/
# Custom TLD lists.
# Format: name: /path/to/file

# Changes will not be applied w/o server restart.
# Path to the list is under top_level_domains_path (see above).
top_level_domains_lists: ''

# public_suffix_list: /path/to/public_suffix_list.dat

# Configuration of external dictionaries. See:
# https://clickhouse.com/docs/en/sql-reference/dictionaries/external-dictionaries/external-dicts
dictionaries_config: '*_dictionary.xml'

# Uncomment if you want data to be compressed 30-100% better.
# Don't do that if you just started using ClickHouse.

# compression:
#     # Set of variants. Checked in order. Last matching case wins. If nothing matches, lz4 will be used.
#     case:
#         Conditions. All must be satisfied. Some conditions may be omitted.
#         # min_part_size: 10000000000    # Min part size in bytes.
#         # min_part_size_ratio: 0.01     # Min size of part relative to whole table size.
#         # What compression method to use.
#         method: zstd

# Allow to execute distributed DDL queries (CREATE, DROP, ALTER, RENAME) on cluster.
# Works only if ZooKeeper is enabled. Comment it if such functionality isn't required.
distributed_ddl:
  # Path in ZooKeeper to queue with DDL queries
  path: /clickhouse/task_queue/ddl

  # Settings from this profile will be used to execute DDL queries
  # profile: default

  # Controls how much ON CLUSTER queries can be run simultaneously.
  # pool_size: 1

  # Cleanup settings (active tasks will not be removed)

  # Controls task TTL (default 1 week)
  # task_max_lifetime: 604800

  # Controls how often cleanup should be performed (in seconds)
  # cleanup_delay_period: 60

  # Controls how many tasks could be in the queue
  # max_tasks_in_queue: 1000

# Settings to fine tune MergeTree tables. See documentation in source code, in MergeTreeSettings.h
# merge_tree:
#     max_suspicious_broken_parts: 5

# Protection from accidental DROP.
# If size of a MergeTree table is greater than max_table_size_to_drop (in bytes) than table could not be dropped with any DROP query.
# If you want do delete one table and don't want to change clickhouse-server config, you could create special file <clickhouse-path>/flags/force_drop_table and make DROP once.
# By default max_table_size_to_drop is 50GB; max_table_size_to_drop=0 allows to DROP any tables.
# The same for max_partition_size_to_drop.
# Uncomment to disable protection.

# max_table_size_to_drop: 0
# max_partition_size_to_drop: 0

# Example of parameters for GraphiteMergeTree table engine
graphite_rollup_example:
  pattern:
    regexp: click_cost
    function: any
    retention:
      - age: 0
        precision: 3600
      - age: 86400
        precision: 60
  default:
    function: max
    retention:
      - age: 0
        precision: 60
      - age: 3600
        precision: 300
      - age: 86400
        precision: 3600

format_schema_path: /var/lib/clickhouse/format_schemas/

# Default query masking rules, matching lines would be replaced with something else in the logs
# (both text logs and system.query_log).
# name - name for the rule (optional)
# regexp - RE2 compatible regular expression (mandatory)
# replace - substitution string for sensitive data (optional, by default - six asterisks)
query_masking_rules:
  rule:
    name: hide encrypt/decrypt arguments
    regexp: '((?:aes_)?(?:encrypt|decrypt)(?:_mysql)?)\s*\(\s*(?:''(?:\\''|.)+''|.*?)\s*\)'
    # or more secure, but also more invasive:
    # (aes_\w+)\s*\(.*\)
    replace: \1(???)

# Uncomment to use custom http handlers.
# rules are checked from top to bottom, first match runs the handler
# url - to match request URL, you can use 'regex:' prefix to use regex match(optional)
# methods - to match request method, you can use commas to separate multiple method matches(optional)
# headers - to match request headers, match each child element(child element name is header name), you can use 'regex:' prefix to use regex match(optional)
# handler is request handler
# type - supported types: static, dynamic_query_handler, predefined_query_handler
# query - use with predefined_query_handler type, executes query when the handler is called
# query_param_name - use with dynamic_query_handler type, extracts and executes the value corresponding to the <query_param_name> value in HTTP request params
# status - use with static type, response status code
# content_type - use with static type, response content-type
# response_content - use with static type, Response content sent to client, when using the prefix 'file://' or 'config://', find the content from the file or configuration send to client.

# http_handlers:
#     - rule:
#         url: /
#         methods: POST,GET
#         headers:
#           pragma: no-cache
#         handler:
#           type: dynamic_query_handler
#           query_param_name: query
#     - rule:
#         url: /predefined_query
#         methods: POST,GET
#         handler:
#           type: predefined_query_handler
#           query: 'SELECT * FROM system.settings'
#     - rule:
#         handler:
#           type: static
#           status: 200
#           content_type: 'text/plain; charset=UTF-8'
#           response_content: config://http_server_default_response

send_crash_reports:
  enabled: false
  anonymize: false
  endpoint: 'https://[email protected]/5226277'
  # disable_internal_dns_cache: 1

users.yaml

uuidgen | tee -a /dev/fd/2 | tr -d '\r\n' | sha256sum

profiles:
  default:
    # 10G
    max_memory_usage: 10000000000

    # How to choose between replicas during distributed query processing.
    # random - choose random replica from set of replicas with minimum number of errors
    # nearest_hostname - from set of replicas with minimum number of errors, choose replica
    # with minimum number of different symbols between replica's hostname and local hostname (Hamming distance).
    # in_order - first live replica is chosen in specified order.
    # first_or_random - if first replica one has higher number of errors, pick a random one from replicas with minimum number of errors.
    load_balancing: random

  readonly:
    readonly: 1

users:
  # 如果未给 username 则名字为 default
  default:
    # PASSWORD=$(base64 < /dev/urandom | head -c32); echo "$PASSWORD"; echo -n "$PASSWORD" | sha256sum | tr -d '-'
    password_sha256_hex:
    # PASSWORD=$(base64 < /dev/urandom | head -c32); echo "$PASSWORD"; echo -n "$PASSWORD" | sha1sum | tr -d '-' | xxd -r -p | sha1sum | tr -d '-'
    password_double_sha1_hex:
    password: ''

    # LDAP
    # server: my_ldap_server
    # ldap:
    kerberos:
      realm: EXAMPLE.COM


    # List of networks with open access.
    #
    # To open access from everywhere, specify:
    #    - ip: '::/0'
    #
    # To open access only from localhost, specify:
    #    - ip: '::1'
    #    - ip: 127.0.0.1
    #
    # Each element of list has one of the following forms:
    # ip: IP-address or network mask. Examples: 213.180.204.3 or 10.0.0.1/8 or 10.0.0.1/255.255.255.0
    # 2a02:6b8::3 or 2a02:6b8::3/64 or 2a02:6b8::3/ffff:ffff:ffff:ffff::.
    # host: Hostname. Example: server01.clickhouse.com.
    # To check access, DNS query is performed, and all received addresses compared to peer address.
    # host_regexp: Regular expression for host names. Example, ^server\d\d-\d\d-\d\.clickhouse\.com$
    # To check access, DNS PTR query is performed for peer address and then regexp is applied.
    # Then, for result of PTR query, another DNS query is performed and all received addresses compared to peer address.
    # Strongly recommended that regexp is ends with $ and take all expression in ''
    # All results of DNS requests are cached till server restart.

    networks:
      ip: '::/0'

    # Settings profile for user.
    profile: default

    # Quota for user.
    quota: default

    # User can create other users and grant rights to them.
    # access_management: 1

# Quotas.
quotas:
  # Name of quota.
  default:
    # Limits for time interval. You could specify many intervals with different limits.
    interval:
      # Length of interval.
      duration: 3600

      # No limits. Just calculate resource usage for time interval.
      queries: 0
      errors: 0
      result_rows: 0
      read_rows: 0
      execution_time: 0

REST API

echo 'SELECT version()' | curl 'http://localhost:8123/' --data-binary @-

curl 'http://192.168.66.61:8123?query=select%20version()'
curl --get http://192.168.66.61:8123 --data-urlencode 'query=select version()'

Query

select version();

select currentDatabase();
select currentProfiles();
select currentUser();
select currentRoles();

docker_related_config.xml

<clickhouse>
     <!-- Listen wildcard address to allow accepting connections from other containers and host network. -->
    <listen_host>::</listen_host>
    <listen_host>0.0.0.0</listen_host>
    <listen_try>1</listen_try>

    <!--
    <logger>
        <console>1</console>
    </logger>
    -->
</clickhouse>

users.yaml​

REST API​

Query​

docker_related_config.xml​

users.yaml

REST API

Query

docker_related_config.xml