Mysql主从和故障
2020-06-07 本文已影响0人
Odven
1) 搭建传统的mysql主从
(1) 主上面的操作
# 配置文件
cat > /etc/mysqld3380.cnf <<EOF
[mysqld]
user = mysql
bind-address = 0.0.0.0
port = 3380
basedir = /opt/mysql_process/mysql
datadir = /data/mysql/3380/data
socket = /data/mysql/3380/pid/mysqld.sock
pid-file = /data/mysql/3380/pid/mysqld.pid
log_error = /data/mysql/3380/log/error.log
# 开启双一操作
sync_binlog = 1
innodb_flush_log_at_trx_commit = 1
# 搭建主从
server_id = 50
log_bin = /data/mysql/3380/log/mysql_bin
binlog_format = row
[mysql]
socket = /data/mysql/3380/pid/mysqld.sock
EOF
# 创建用户并设置权限
create user repl@"192.168.137.%" identified by "123";
grant replication slave on *.* to repl@"192.168.137.%";
show grants for repl@"192.168.137.%"; # 查看权限
# 备份数据
mysqldump -S /data/mysql/3380/pid/mysqld.sock --master-data=2 --triggers -E -R --single-transaction -A > full.sql
(2) 从上面的操作
# 配置文件
cat > /etc/mysqld3381.cnf << EOF
[mysqld]
user = mysql
bind-address = 0.0.0.0
port = 3381
basedir = /opt/mysql_process/mysql
datadir = /data/mysql/3381/data
socket = /data/mysql/3381/pid/mysqld.sock
pid-file = /data/mysql/3381/pid/mysqld.pid
log_error = /data/mysql/3381/log/error.log
# 开启双一操作
sync_binlog = 1
innodb_flush_log_at_trx_commit = 1
# 搭建主从
server_id = 100
log_bin = /data/mysql/3381/log/mysql_bin
binlog_format = row
[mysql]
socket = /data/mysql/3381/pid/mysqld.sock
EOF
# 导入数据
mysql -S /data/mysql/3381/pid/mysqld.sock < full.sql
# 查看备份文件找到binlog文件名和position(大概在22行的位置)
head -30 full.sql
......
-- CHANGE MASTER TO MASTER_LOG_FILE='mysql_bin.000001', MASTER_LOG_POS=1940;
......
# 进入数据库执行CHANGE MASTER TO XXX语句就会把主的信息写入master.info文件中
CHANGE MASTER TO
MASTER_HOST='192.168.137.50',
MASTER_USER='repl',
MASTER_PASSWORD='123',
MASTER_PORT=3380,
MASTER_LOG_FILE='mysql_bin.000001',
MASTER_LOG_POS=1940,
MASTER_CONNECT_RETRY=10;
# 启动从的线程
start slave;
# 查看主从启动成功没有
show slave status\G;
......
Slave_IO_Running: Yes
Slave_SQL_Running: Yes
......
如果是Yes表示成功
2) 搭建GTID的mysql主从(建议使用基于GTID来搭建主从)
(1) 主上面的操作
# 配置文件
cat > /etc/mysqld3380.cnf <<EOF
[mysqld]
user = mysql
bind-address = 0.0.0.0
port = 3380
basedir = /opt/mysql_process/mysql
datadir = /data/mysql/3380/data
socket = /data/mysql/3380/pid/mysqld.sock
pid-file = /data/mysql/3380/pid/mysqld.pid
log_error = /data/mysql/3380/log/error.log
# 开启双一操作
sync_binlog = 1
innodb_flush_log_at_trx_commit = 1
# 启动binlog
server_id = 50
log_bin = /data/mysql/3380/log/mysql_bin
binlog_format = row
# 启动GTID
gtid-mode = on
enforce-gtid-consistency = true
log-slave-updates = 1
[mysql]
socket = /data/mysql/3380/pid/mysqld.sock
prompt=M [\\d]>
EOF
# 创建用户并设置权限
create user repl@"192.168.137.%" identified by "123";
grant replication slave on *.* to repl@"192.168.137.%";
show grants for repl@"192.168.137.%"; # 查看权限
# 备份数据
mysqldump -S /data/mysql/3380/pid/mysqld.sock --master-data=2 --triggers -E -R --single-transaction -A > full.sql
(2) 从上面的操作
# 配置文件
cat > /etc/mysqld3381.cnf << EOF
[mysqld]
user = mysql
bind-address = 0.0.0.0
port = 3381
basedir = /opt/mysql_process/mysql
datadir = /data/mysql/3381/data
socket = /data/mysql/3381/pid/mysqld.sock
pid-file = /data/mysql/3381/pid/mysqld.pid
log_error = /data/mysql/3381/log/error.log
# 开启双一操作
sync_binlog = 1
innodb_flush_log_at_trx_commit = 1
# 启动binlog
server_id = 100
log_bin = /data/mysql/3381/log/mysql_bin
binlog_format = row
# 启动GTID
gtid-mode = on
enforce-gtid-consistency = true
log-slave-updates = 1
[mysql]
socket = /data/mysql/3381/pid/mysqld.sock
prompt=M [\\d]>
EOF
# 导入数据
mysql -S /data/mysql/3381/pid/mysqld.sock < full.sql
# 进入数据库执行CHANGE MASTER TO XXX语句
CHANGE MASTER TO
MASTER_HOST='192.168.137.50',
MASTER_USER='repl',
MASTER_PASSWORD='123',
MASTER_PORT=3380,
MASTER_AUTO_POSITION=1;
# 启动从的线程
start slave;
# 查看主从启动成功没有
show slave status\G;
......
Slave_IO_Running: Yes
Slave_SQL_Running: Yes
......
如果是Yes表示成功
3) IO线程故障
# 从主上分析
1. binlog 被破坏
2. 执行了reset master;
3. 可能开启了防火墙限制,selinux
4. 连接数达到上限(增加连接数 set global max_connections=300;)
......
# 从从上分析
1. 写错了主的user或password或log_file或log_pos等
2. 可能开启了防火墙限制, selinux
......
4) SQL线程故障
1. relay_log被损坏,删除等
2. 在从库上面做了写操作等(show variables like '%read_only%'; 在配置文件添加read_only=1 或者再加上super_read_only=1)
.....
5) 主从延时
# 从主上分析
1. 有大的事物或并发事务量大
2. 有大的IO(自己都忙不过来,没空理会dump线程)
......
# 从从上分析
1. SQL线程值开启单线程
2. 很大的并发事物或大事物
3. 主从服务器性能差很多
4. 索引的差异
......
6) 查看问题和解决方法
1. show slave status\G; 查看具体的错误信息,然后做相应的处理
......
Last_IO_Errno: 0
Last_IO_Error:
......
2. 查看防火墙,网络
3. 在从上直接用同步的用户连接主
4. 查看binlog文件和位置等
5. change master to xxx
stop slave;
reset slave all;
change master to xxx;
start slave;
6. 跳过错误
stop slave;
set global sql_slave_skip_counter = 1;
start slave;
或
/etc/my.cnf
# 1007:对象已存在, 1032:要执行的对象不存在,无法执行DML, 1062:主键冲突,或约束冲突
slave-skip-errors = 1032,1062,1007
7. 重新做主从
8. 使用基于GTID的主从(这样可以使DUMP_T多并发,开启多个SQL_T进行多并发)