clickhouse集群搭建
2025-04-29 本文已影响0人
后知不觉1
1. 配置说明
| 名称 | ip | 说明 |
|---|---|---|
| node1 | 192.168.1.2 | |
| node2 | 192.168.1.3 | |
| node3 | 192.168.1.4 |
2. 调整内核参数
# 设置文件数,线程数
ansible cluster -m shell -a "mv /etc/security/limits.d/20-nproc.conf /etc/security/20-nproc.confbak"
ansible cluster -m shell -a "cp /etc/security/limits.conf /etc/security/limits.confbak"
ansible cluster -m shell -a "echo -e '* soft nproc 524288\n* hard nproc 524288\n* soft nofile 524288\n* hard nofile 524288\n* soft sigpending 1030568\n* hard sigpending 1030568\n' >> /etc/security/limits.conf"
# 关闭swap
ansible cluster -mshell -a"sed -i '/swap/s/^/#/' /etc/fstab"
ansible cluster -mshell -a"swapoff -a"
3. zookeeper部署
注意部署zk时设置启动内存大小,配置文件
clientPort=2181
initLimit=10
autopurge.purgeInterval=24
syncLimit=5
tickTime=3000
dataDir=/data1/hadoop/zookeeper
autopurge.snapRetainCount=30
server.1=hwy-sfcloud-nn-dev-01.sfsscn.com:2888:3888
server.2=hwy-sfcloud-nn-dev-02.sfsscn.com:2888:3888
server.3=hwy-sfcloud-nn-dev-03.sfsscn.com:2888:3888
4. clickhouse部署
4.1 下载
阿里云上选择稳定版本的包,历史原因选择21.4.5.46的版本
https://mirrors.aliyun.com/clickhouse/tgz/stable/?spm=a2c6h.25603864.0.0.39c06dc5VbEXtV
wget https://mirrors.aliyun.com/clickhouse/tgz/stable/clickhouse-client-21.4.5.46.tgz
wget https://mirrors.aliyun.com/clickhouse/tgz/stable/clickhouse-common-static-21.4.5.46.tgz
wget https://mirrors.aliyun.com/clickhouse/tgz/stable/clickhouse-common-static-dbg-21.4.5.46.tgz
wget https://mirrors.aliyun.com/clickhouse/tgz/stable/clickhouse-server-21.4.5.46.tgz
4.2 解压安装
安装在/opt目录,当前下载在/opt/clickhouse目录
执行有顺序,会初始化环境,创建clickhouse用户,创建默认的配置目录
ansible cluster -mshell -a "ls /opt/clickhouse|xargs -i{} tar -xvf {} -C /opt/clickhouse/"
ansible cluster -mshell -a "bash /opt/clickhouse/clickhouse-client-21.4.5.46/install/doinst.sh"
ansible cluster -mshell -a "bash /opt/clickhouse/clickhouse--common-static-21.4.5.46/install/doinst.sh"
ansible cluster -mshell -a "bash /opt/clickhouse/clickhouse-common-static-dbg-21.4.5.46/install/doinst.sh"
ansible cluster -mshell -a "bash /opt/clickhouse/clickhouse-server-21.4.5.46/install/doinst.sh"
4.3 配置
这里配置两个集群
- default 3分配2副本,用于提高吞吐,2. c3_all 1分片3副本,特殊的表用于关联查询
config.xml
<?xml version="1.0"?>
<yandex>
<logger>
<level>notice</level>
<log>/data01/clickhouse/clickhouse-server.log</log>
<errorlog>/data01/clickhouse/clickhouse-server.err.log</errorlog>
<size>1024M</size>
<count>10</count>
</logger>
<listen_host>0.0.0.0</listen_host>
<http_port>8123</http_port>
<tcp_port>9000</tcp_port>
<interserver_http_port>9009</interserver_http_port>
<max_connections>4096</max_connections>
<keep_alive_timeout>3</keep_alive_timeout>
<max_session_timeout>3600</max_session_timeout>
<default_session_timeout>60</default_session_timeout>
<max_concurrent_queries>600</max_concurrent_queries>
<uncompressed_cache_size>8589934592</uncompressed_cache_size>
<mark_cache_size>5368709120</mark_cache_size>
<storage_configuration>
<disks>
<!-- 默认磁盘配置 -->
<default>
<path>/var/lib/clickhouse/</path>
</default>
<!-- 附加磁盘1 -->
<disk1>
<path>/data01/clickhouse_disk1/</path>
<keep_free_space_bytes>2147483648</keep_free_space_bytes> <!-- 保留1GB空闲空间 -->
</disk1>
<!-- 附加磁盘2 -->
<disk2>
<path>/data02/clickhouse_disk2/</path>
<keep_free_space_bytes>2147483648</keep_free_space_bytes> <!-- 保留2GB空闲空间 -->
</disk2>
</disks>
<policies>
<!-- 轮询策略 -->
<round_robin>
<volumes>
<default>
<disk>default</disk>
<disk>disk1</disk>
<disk>disk2</disk>
</default>
</volumes>
</round_robin>
<!-- 热冷数据分层策略 -->
<tiered>
<volumes>
<hot>
<!-- default库是高速盘 -->
<disk>default</disk>
<max_data_part_size_bytes>1073741824</max_data_part_size> <!-- 1GB以下存热数据 -->
</hot>
<cold>
<disk>disk1</disk>
<disk>disk2</disk>
</cold>
</volumes>
</tiered>
</policies>
</storage_configuration>
<!-- 与storage_configuration冲突优先级更高 -->
<path>/data/clickhouse</path>
<tmp_path>/data01/clickhouse/tmp_data/</tmp_path>
<user_files_path>/data/clickhouse/user_files/</user_files_path>
<format_schema_path>/data/clickhouse/format_schemas/</format_schema_path>
<!-- 经管没有同层级没有macros相关配置,会自动查找config.d、conf.d下 -->
<users_config>users.xml</users_config>
<default_profile>default</default_profile>
<system_profile>default</system_profile>
<default_database>default</default_database>
<distributed_ddl>
<path>/clickhouse/basic_monitor/task_queue/ddl</path>
</distributed_ddl>
<builtin_dictionaries_reload_interval>3600</builtin_dictionaries_reload_interval>
<dictionaries_config>*_dictionary.xml</dictionaries_config>
<max_table_size_to_drop>53687091200</max_table_size_to_drop>
<max_partition_size_to_drop>0</max_partition_size_to_drop>
<query_log>
<database>system</database>
<table>query_log</table>
<partition_by>toYYYYMM(event_date)</partition_by>
<flush_interval_milliseconds>7500</flush_interval_milliseconds>
</query_log>
<merge_tree>
<use_minimalistic_part_header_in_zookeeper>1</use_minimalistic_part_header_in_zookeeper>
<parts_to_delay_insert>300</parts_to_delay_insert>
<parts_to_throw_insert>600</parts_to_throw_insert>
<max_delay_to_insert>2</max_delay_to_insert>
<max_part_loading_threads>48</max_part_loading_threads>
</merge_tree>
<prometheus>
<endpoint>/metrics</endpoint>
<port>8001</port>
<metrics>true</metrics>
<events>true</events>
<asynchronous_metrics>true</asynchronous_metrics>
</prometheus>
<mlock_executable>true</mlock_executable>
<remote_servers incl="clickhouse_remote_servers"/>
<include_from>/etc/clickhouse-server/config.d/cluster.xml</include_from>
<!-- 经管没有同层级没有zk相关配置,会自动查找config.d、conf.d下 -->
<zookeeper incl="zookeeper-servers" optional="true"/>
<!-- 经管没有同层级没有macros相关配置,会自动查找config.d、conf.d下 -->
<macros incl="macros" optional="true"/>
<compression incl="clickhouse_compression" optional="true"/>
</yandex>
config.d/cluster
<yandex>
<clickhouse_remote_servers>
<default>
<shard>
<internal_replication>true</internal_replication>
<replica>
<host>node1</host>
<port>9000</port>
<user>admin</user>
<password>123</password>
</replica>
<replica>
<host>node2</host>
<port>9000</port>
<user>admin</user>
<password>123</password>
</replica>
</shard>
<shard>
<internal_replication>true</internal_replication>
<replica>
<host>node2</host>
<port>9000</port>
<user>admin</user>
<password>123</password>
</replica>
<replica>
<host>node3</host>
<port>9000</port>
<user>admin</user>
<password>123</password>
</replica>
</shard>
<shard>
<internal_replication>true</internal_replication>
<replica>
<host>node1</host>
<port>9000</port>
<user>admin</user>
<password>123</password>
</replica>
<replica>
<host>node3</host>
<port>9000</port>
<user>admin</user>
<password>123</password>
</replica>
</shard>
</default>
<c3_all>
<shard>
<internal_replication>true</internal_replication>
<replica>
<host>node1</host>
<port>9000</port>
<user>admin</user>
<password>123</password>
</replica>
<replica>
<host>node2</host>
<port>9000</port>
<user>admin</user>
<password>123</password>
</replica>
<replica>
<host>node3</host>
<port>9000</port>
<user>admin</user>
<password>123</password>
</replica>
</shard>
</c3_all>
</clickhouse_remote_servers>
</yandex>
config.d/user.xml
<?xml version="1.0"?>
<yandex>
<profiles>
<default>
<max_memory_usage>21474836480</max_memory_usage>
<max_threads>4</max_threads>
<distributed_product_mode>local</distributed_product_mode>
<max_partitions_per_insert_block>10000</max_partitions_per_insert_block>
<log_queries>1</log_queries>
<use_uncompressed_cache>1</use_uncompressed_cache>
<load_balancing>random</load_balancing>
<poll_interval>1</poll_interval>
<idle_connection_timeout>2</idle_connection_timeout>
<max_execution_time>600</max_execution_time>
<input_format_skip_unknown_fields>1</input_format_skip_unknown_fields>
<send_progress_in_http_headers>0</send_progress_in_http_headers>
<insert_allow_materialized_columns>1</insert_allow_materialized_columns>
<distributed_ddl_task_timeout>300</distributed_ddl_task_timeout>
<os_thread_priority>-3</os_thread_priority>
<background_pool_size>10</background_pool_size>
<replication_alter_partitions_sync>2</replication_alter_partitions_sync>
</default>
<readonly>
<readonly>2</readonly>
</readonly>
<ts>
<max_query_size>1000000</max_query_size>
</ts>
</profiles>
<!-- Users and ACL. -->
<users>
<default>
<password>123</password>
<networks incl="networks" replace="replace">
<ip>0.0.0.0/0</ip>
</networks>
<profile>default</profile>
<quota>default</quota>
</default>
<admin>
<password>123</password>
<networks incl="networks" replace="replace">
<ip>0.0.0.0/0</ip>
</networks>
<profile>default</profile>
<quota>default</quota>
</admin>
</users>
<quotas>
<default>
<interval>
<duration>3600</duration>
<queries>0</queries>
<errors>0</errors>
<result_rows>0</result_rows>
<read_rows>0</read_rows>
<execution_time>0</execution_time>
</interval>
</default>
</quotas>
</yandex>
config.d/marcos.xml
宏每台机器上配置是不一样的
按照上述集群宏分布应该如下
| 集群名称 | node1(分片 副本) | node2(分片 副本) | node2(分片 副本) | 备注 |
|---|---|---|---|---|
| default | 01 01, 03 01 | 01 02,02 01 | 02 03 ,03 03 | |
| c3_all | 01 01 | 01 02 | 01 03 |
<yandex>
<macros>
<shard>01</shard>
<replica>02</replica>
<c3_all_replica>02</c3_all_replica>
<c3_all_shard>all</c3_all_shard>
</macros>
</yandex>
config.d/zk.xml
<yandex>
<zookeeper>
<node>
<host>node1</host>
<port>2181</port>
</node>
<node>
<host>node2</host>
<port>2181</port>
</node>
<node>
<host>node3</host>
<port>2181</port>
</node>
<operation_timeout_ms>10000</operation_timeout_ms>
</zookeeper>
</yandex>
5. 启动clickhouse
ansible cluster -mshell -a "su clickhouse 'systemctl restart clickhouse'"
6 验证
6.1、分布式库验证
# 登录
clickhouse-client --host node1 --port 9000 --user admin --password 123
# 创建分布式库
CREATE DATABASE test123 ON CLUSTER 'default';
# 登录 另一台
clickhouse-client --host node2 --port 9000 --user admin --password 123
# 查看库
show databases;
#能查看到则正常
6.2、分布式表验证
# 登录
clickhouse-client --host node1 --port 9000 --user admin --password 123
# 创建本地表
CREATE TABLE test123.test2_local_table ON CLUSTER 'c6_all' (id UInt32,event_date Date,value String) ENGINE = ReplicatedMergeTree('/clickhouse/tables/{database}/{table}/{uuid}/{c6_all_shard}', '{c6_all_replica}') ORDER BY (id) SETTINGS index_granularity = 8192;
CREATE TABLE test123.test2_table
ENGINE = Distributed(default,test123,test2_local_table,rand());
# 登录 另一台
clickhouse-client --host node2 --port 9000 --user admin --password 123
# 查看库
use test123;
show tables;
#能查看到则正常
6.3、创建全副本表验证
# 登录
clickhouse-client --host node1 --port 9000 --user admin --password 123
# 创建本地表
CREATE TABLE test123.test123_local_table ON CLUSTER 'c3_all' (id UInt32,event_date Date,value String) ENGINE = ReplicatedMergeTree() ORDER BY (id);
INSERT INTO test123.test2_local_table VALUES (1, '2023-01-01', 'value1'),(2, '2023-01-02', 'value2');
# 登录 另一台
clickhouse-client --host node2 --port 9000 --user admin --password 123
# 查看库
select * from test123.test123_local_table
#能查看到则正常