it运维开发教程,运维管理,因为微不足道,所以值得关注!域名来源:sos 救援 idc 数据中心 所以sos+idc=sosidc.com

mysql高可用mha解决方案03-去哪网采用的

发布:admin2016-3-4 16:25分类: 解决方案

2.安装部署MHA

2.1安装MHA node(在所有Mysql服务器上安装)

1)安装依赖包

  rpm -Uvh http://dl.fedoraproject.org/pub/epel/6/x86_64/epel-release-6-8.noarch.rpm

rpm --import /etc/pki/rpm-gpg/RPM-GPG-KEY-EPEL-6

yum -y install perl-DBD-MySQL perl-Config-Tiny perl-Log-Dispatch perl-Parallel-ForkManager perl-Config-IniFiles perl-Time-HiRes  perl-Time-HiRes  perl-CPAN

 

2)在所有的节点上安装mha node:

tar  zxvf  mha4mysql-node-0.56.tar.gz

cd  mha4mysql-node-0.56

perl  Makefile.PL

make

make install

 

2.2.安装MHA Manager

MHA Manager中主要包括了几个管理员的命令行工具,例如masterha_managermasterha_master_switch等。

(1)       安装依赖包

rpm -Uvh http://dl.fedoraproject.org/pub/epel/6/x86_64/epel-release-6-8.noarch.rpm

rpm --import /etc/pki/rpm-gpg/RPM-GPG-KEY-EPEL-6

yum -y install perl-DBD-MySQL perl-Config-Tiny perl-Log-Dispatch perl-Parallel-ForkManager perl-Config-IniFiles perl-Time-HiRes  perl-Time-HiRes  perl-CPAN

(2)       安装MHA node软件包。注意,在MHA Manger的主机上也要安装MHA node.

tar  zxvf  mha4mysql-node-0.56.tar.gz

cd  mha4mysql-node-0.56

perl  Makefile.PL

make

make install

(3)       安装MHA Manager软件包。

tar zxvf mha4mysql-manager-0.56.tar.gz

cd mha4mysql-manager-0.56

perl Makefile.PL

make

make install

 

2.3. 配置SSH 登录无密码验证

(1)       manager 192.168.127.100上配置到所有节点的无密码验证

ssh-keygen -t rsa

ssh-copy-id -i ~/.ssh/id_rsa.pub root@MHA

ssh-copy-id -i ~/.ssh/id_rsa.pub root@master

ssh-copy-id -i ~/.ssh/id_rsa.pub root@slave01

ssh-copy-id -i ~/.ssh/id_rsa.pub root@slave02

(2)       MHA Node master(192.168.127.101)上:

 ssh-keygen -t rsa

 ssh-copy-id -i ~/.ssh/id_rsa.pub root@MHA

ssh-copy-id -i ~/.ssh/id_rsa.pub root@master

ssh-copy-id -i ~/.ssh/id_rsa.pub root@slave01

     ssh-copy-id -i ~/.ssh/id_rsa.pub root@slave02

(3)       MHA Node slave01(192.168.127.102)上:

 ssh-keygen -t rsa

ssh-copy-id -i ~/.ssh/id_rsa.pub root@MHA

 ssh-copy-id -i ~/.ssh/id_rsa.pub root@master

ssh-copy-id -i ~/.ssh/id_rsa.pub root@slave01

     ssh-copy-id -i ~/.ssh/id_rsa.pub root@slave02

(4)       MHA Node slave02(192.168.127.103)上:

 ssh-keygen -t rsa

ssh-copy-id -i ~/.ssh/id_rsa.pub root@MHA

 ssh-copy-id -i ~/.ssh/id_rsa.pub root@master

     ssh-copy-id -i ~/.ssh/id_rsa.pub root@slave01

ssh-copy-id -i ~/.ssh/id_rsa.pub root@slave02

 

   在每台做以下步骤

ln -s /app/mysql5.6/bin/* /usr/local/bin/

把脚本拷贝相关目录

[root@MHA ~]# cp /root/mha4mysql-manager-0.56/samples/scripts/master_ip_failover /usr/local/bin/

 

[root@MHA ~]# cp /root/mha4mysql-manager-0.56/samples/scripts/master_ip_online_change /usr/local/bin/

 

[root@MHA~]#cp /root/mha4mysql-manager-0.56/samples/scripts/send_report  /usr/local/bin/

 

[root@MHA ~]# cp /root/mha4mysql-manager-0.56/bin/masterha_secondary_check /usr/bin/

3.配置MHA

配置MHA的步骤如下。

(1)       创建MHA工作目录,并且创建相关配置文件:

mkdir -p /etc/masterha

mkdir -p /masterha/app1

      配置如下

vi /etc/masterha/app1.cnf

[server default]

manager_workdir=/masterha/app1

manager_log=/masterha/app1/app1.log

master_ip_failover_script=/usr/local/bin/master_ip_failover

master_ip_online_change_script=/usr/local/bin/master_ip_online_change

 

user=root

password=123456

ssh_user=root

repl_user=repl

repl_password=repl

ping_interval=1

remote_workdir=/tmp

report_script=/usr/local/bin/send_report

secondary_check_script=/usr/bin/masterha_secondary_check  -s MHA -s slave02  --user=root --master_host=master --master_ip=192.168.127.101 --master_port=3306 --password=123456

shutdown_script=""

report_script=""

 

 

[server1]

hostname=192.168.127.101

master_binlog_dir=/data/mysql3306

candidate_master=1

[server2]

hostname=192.168.127.102

master_binlog_dir=/data/mysql3306

candidate_master=1

check_repl_delay=0

 

[server3]

hostname=192.168.127.103

master_binlog_dir=/data/mysql3306

no_master=1

 

4.检查SSH的配置

检查MHA Manager到所有MHA nodeSSH连接状态:

[root@MHA ~]# masterha_check_ssh --conf=/etc/masterha/app1.cnf

Wed Mar  2 19:03:30 2016 - [warning] Global configuration file /etc/masterha_default.cnf not found. Skipping.

Wed Mar  2 19:03:30 2016 - [info] Reading application default configuration from /etc/masterha/app1.cnf..

Wed Mar  2 19:03:30 2016 - [info] Reading server configuration from /etc/masterha/app1.cnf..

Wed Mar  2 19:03:30 2016 - [info] Starting SSH connection tests..

Wed Mar  2 19:03:31 2016 - [debug]

Wed Mar  2 19:03:30 2016 - [debug]  Connecting via SSH from root@192.168.127.101(192.168.127.101:22) to root@192.168.127.102(192.168.127.102:22)..

Wed Mar  2 19:03:30 2016 - [debug]   ok.

Wed Mar  2 19:03:30 2016 - [debug]  Connecting via SSH from root@192.168.127.101(192.168.127.101:22) to root@192.168.127.103(192.168.127.103:22)..

Wed Mar  2 19:03:30 2016 - [debug]   ok.

Wed Mar  2 19:03:31 2016 - [debug]

Wed Mar  2 19:03:30 2016 - [debug]  Connecting via SSH from root@192.168.127.102(192.168.127.102:22) to root@192.168.127.101(192.168.127.101:22)..

Wed Mar  2 19:03:31 2016 - [debug]   ok.

Wed Mar  2 19:03:31 2016 - [debug]  Connecting via SSH from root@192.168.127.102(192.168.127.102:22) to root@192.168.127.103(192.168.127.103:22)..

Wed Mar  2 19:03:31 2016 - [debug]   ok.

Wed Mar  2 19:03:32 2016 - [debug]

Wed Mar  2 19:03:31 2016 - [debug]  Connecting via SSH from root@192.168.127.103(192.168.127.103:22) to root@192.168.127.101(192.168.127.101:22)..

Wed Mar  2 19:03:31 2016 - [debug]   ok.

Wed Mar  2 19:03:31 2016 - [debug]  Connecting via SSH from root@192.168.127.103(192.168.127.103:22) to root@192.168.127.102(192.168.127.102:22)..

Wed Mar  2 19:03:32 2016 - [debug]   ok.

Wed Mar  2 19:03:32 2016 - [info] All SSH connection tests passed successfully.

5.检查整个复制环境

[root@MHA ~]# masterha_check_ssh --conf=/etc/masterha/app1.cnf

Wed Mar  2 19:03:30 2016 - [warning] Global configuration file /etc/masterha_default.cnf not found. Skipping.

Wed Mar  2 19:03:30 2016 - [info] Reading application default configuration from /etc/masterha/app1.cnf..

Wed Mar  2 19:03:30 2016 - [info] Reading server configuration from /etc/masterha/app1.cnf..

Wed Mar  2 19:03:30 2016 - [info] Starting SSH connection tests..

Wed Mar  2 19:03:31 2016 - [debug]

Wed Mar  2 19:03:30 2016 - [debug]  Connecting via SSH from root@192.168.127.101(192.168.127.101:22) to root@192.168.127.102(192.168.127.102:22)..

Wed Mar  2 19:03:30 2016 - [debug]   ok.

Wed Mar  2 19:03:30 2016 - [debug]  Connecting via SSH from root@192.168.127.101(192.168.127.101:22) to root@192.168.127.103(192.168.127.103:22)..

Wed Mar  2 19:03:30 2016 - [debug]   ok.

Wed Mar  2 19:03:31 2016 - [debug]

Wed Mar  2 19:03:30 2016 - [debug]  Connecting via SSH from root@192.168.127.102(192.168.127.102:22) to root@192.168.127.101(192.168.127.101:22)..

Wed Mar  2 19:03:31 2016 - [debug]   ok.

Wed Mar  2 19:03:31 2016 - [debug]  Connecting via SSH from root@192.168.127.102(192.168.127.102:22) to root@192.168.127.103(192.168.127.103:22)..

Wed Mar  2 19:03:31 2016 - [debug]   ok.

Wed Mar  2 19:03:32 2016 - [debug]

Wed Mar  2 19:03:31 2016 - [debug]  Connecting via SSH from root@192.168.127.103(192.168.127.103:22) to root@192.168.127.101(192.168.127.101:22)..

Wed Mar  2 19:03:31 2016 - [debug]   ok.

Wed Mar  2 19:03:31 2016 - [debug]  Connecting via SSH from root@192.168.127.103(192.168.127.103:22) to root@192.168.127.102(192.168.127.102:22)..

Wed Mar  2 19:03:32 2016 - [debug]   ok.

Wed Mar  2 19:03:32 2016 - [info] All SSH connection tests passed successfully.

[root@MHA ~]# masterha_check_repl --conf=/etc/masterha/app1.cnf

Wed Mar  2 19:04:12 2016 - [warning] Global configuration file /etc/masterha_default.cnf not found. Skipping.

Wed Mar  2 19:04:12 2016 - [info] Reading application default configuration from /etc/masterha/app1.cnf..

Wed Mar  2 19:04:12 2016 - [info] Reading server configuration from /etc/masterha/app1.cnf..

Wed Mar  2 19:04:12 2016 - [info] MHA::MasterMonitor version 0.56.

Wed Mar  2 19:04:12 2016 - [info] GTID failover mode = 0

Wed Mar  2 19:04:12 2016 - [info] Dead Servers:

Wed Mar  2 19:04:12 2016 - [info] Alive Servers:

Wed Mar  2 19:04:12 2016 - [info]   192.168.127.101(192.168.127.101:3306)

Wed Mar  2 19:04:12 2016 - [info]   192.168.127.102(192.168.127.102:3306)

Wed Mar  2 19:04:12 2016 - [info]   192.168.127.103(192.168.127.103:3306)

Wed Mar  2 19:04:12 2016 - [info] Alive Slaves:

Wed Mar  2 19:04:12 2016 - [info]   192.168.127.102(192.168.127.102:3306)  Version=5.6.27-75.0-log (oldest major version between slaves) log-bin:enabled

Wed Mar  2 19:04:12 2016 - [info]     Replicating from 192.168.127.101(192.168.127.101:3306)

Wed Mar  2 19:04:12 2016 - [info]     Primary candidate for the new Master (candidate_master is set)

Wed Mar  2 19:04:12 2016 - [info]   192.168.127.103(192.168.127.103:3306)  Version=5.6.27-75.0-log (oldest major version between slaves) log-bin:enabled

Wed Mar  2 19:04:12 2016 - [info]     Replicating from 192.168.127.101(192.168.127.101:3306)

Wed Mar  2 19:04:12 2016 - [info]     Not candidate for the new Master (no_master is set)

Wed Mar  2 19:04:12 2016 - [info] Current Alive Master: 192.168.127.101(192.168.127.101:3306)

Wed Mar  2 19:04:12 2016 - [info] Checking slave configurations..

Wed Mar  2 19:04:12 2016 - [info] Checking replication filtering settings..

Wed Mar  2 19:04:12 2016 - [info]  binlog_do_db= , binlog_ignore_db=

Wed Mar  2 19:04:12 2016 - [info]  Replication filtering check ok.

Wed Mar  2 19:04:12 2016 - [info] GTID (with auto-pos) is not supported

Wed Mar  2 19:04:12 2016 - [info] Starting SSH connection tests..

Wed Mar  2 19:04:14 2016 - [info] All SSH connection tests passed successfully.

Wed Mar  2 19:04:14 2016 - [info] Checking MHA Node version..

Wed Mar  2 19:04:15 2016 - [info]  Version check ok.

Wed Mar  2 19:04:15 2016 - [info] Checking SSH publickey authentication settings on the current master..

Wed Mar  2 19:04:15 2016 - [info] HealthCheck: SSH to 192.168.127.101 is reachable.

Wed Mar  2 19:04:15 2016 - [info] Master MHA Node version is 0.56.

Wed Mar  2 19:04:15 2016 - [info] Checking recovery script configurations on 192.168.127.101(192.168.127.101:3306)..

Wed Mar  2 19:04:15 2016 - [info]   Executing command: save_binary_logs --command=test --start_pos=4 --binlog_dir=/data/mysql3306 --output_file=/tmp/save_binary_logs_test --manager_version=0.56 --start_file=mysql-bin.000004

Wed Mar  2 19:04:15 2016 - [info]   Connecting to root@192.168.127.101(192.168.127.101:22)..

  Creating /tmp if not exists..    ok.

  Checking output directory is accessible or not..

   ok.

  Binlog found at /data/mysql3306, up to mysql-bin.000004

Wed Mar  2 19:04:15 2016 - [info] Binlog setting check done.

Wed Mar  2 19:04:15 2016 - [info] Checking SSH publickey authentication and checking recovery script configurations on all alive slave servers..

Wed Mar  2 19:04:15 2016 - [info]   Executing command : apply_diff_relay_logs --command=test --slave_user='root' --slave_host=192.168.127.102 --slave_ip=192.168.127.102 --slave_port=3306 --workdir=/tmp --target_version=5.6.27-75.0-log --manager_version=0.56 --relay_log_info=/data/mysql3306/relay-log.info  --relay_dir=/data/mysql3306/  --slave_pass=xxx

Wed Mar  2 19:04:15 2016 - [info]   Connecting to root@192.168.127.102(192.168.127.102:22)..

  Checking slave recovery environment settings..

    Opening /data/mysql3306/relay-log.info ... ok.

    Relay log found at /data/mysql3306, up to mysqld-relay-bin.000002

    Temporary relay log file is /data/mysql3306/mysqld-relay-bin.000002

    Testing mysql connection and privileges..Warning: Using a password on the command line interface can be insecure.

 done.

    Testing mysqlbinlog output.. done.

    Cleaning up test file(s).. done.

Wed Mar  2 19:04:16 2016 - [info]   Executing command : apply_diff_relay_logs --command=test --slave_user='root' --slave_host=192.168.127.103 --slave_ip=192.168.127.103 --slave_port=3306 --workdir=/tmp --target_version=5.6.27-75.0-log --manager_version=0.56 --relay_log_info=/data/mysql3306/relay-log.info  --relay_dir=/data/mysql3306/  --slave_pass=xxx

Wed Mar  2 19:04:16 2016 - [info]   Connecting to root@192.168.127.103(192.168.127.103:22)..

  Checking slave recovery environment settings..

    Opening /data/mysql3306/relay-log.info ... ok.

    Relay log found at /data/mysql3306, up to mysqld-relay-bin.000002

    Temporary relay log file is /data/mysql3306/mysqld-relay-bin.000002

    Testing mysql connection and privileges..Warning: Using a password on the command line interface can be insecure.

 done.

    Testing mysqlbinlog output.. done.

    Cleaning up test file(s).. done.

Wed Mar  2 19:04:16 2016 - [info] Slaves settings check done.

Wed Mar  2 19:04:16 2016 - [info]


 +--192.168.127.102(192.168.127.102:3306)

 +--192.168.127.103(192.168.127.103:3306)

Wed Mar  2 19:04:16 2016 - [info] Checking replication health on 192.168.127.102..

Wed Mar  2 19:04:16 2016 - [info]  ok.

Wed Mar  2 19:04:16 2016 - [info] Checking replication health on 192.168.127.103..

Wed Mar  2 19:04:16 2016 - [info]  ok.

Wed Mar  2 19:04:16 2016 - [info] Checking master_ip_failover_script status:

Wed Mar  2 19:04:16 2016 - [info]   /usr/local/bin/master_ip_failover --command=status --ssh_user=root --orig_master_host=192.168.127.101 --orig_master_ip=192.168.127.101 --orig_master_port=3306

Bareword "FIXME_xxx" not allowed while "strict subs" in use at /usr/local/bin/master_ip_failover line 93.

Execution of /usr/local/bin/master_ip_failover aborted due to compilation errors.

Wed Mar  2 19:04:16 2016 - [error][/usr/local/share/perl5/MHA/MasterMonitor.pm, ln226]  Failed to get master_ip_failover_script status with return code 255:0.

Wed Mar  2 19:04:16 2016 - [error][/usr/local/share/perl5/MHA/MasterMonitor.pm, ln424] Error happened on checking configurations.  at /usr/local/bin/masterha_check_repl line 48

Wed Mar  2 19:04:16 2016 - [error][/usr/local/share/perl5/MHA/MasterMonitor.pm, ln523] Error happened on monitoring servers.

Wed Mar  2 19:04:16 2016 - [info] Got exit code 1 (Not master dead).

MySQL Replication Health is NOT OK!

说明以上没有成功需要修改以上的问题

93#FIXME_xxx;  注释掉

[root@MHA ~]# masterha_check_repl --conf=/etc/masterha/app1.cnf

Wed Mar  2 19:04:52 2016 - [warning] Global configuration file /etc/masterha_default.cnf not found. Skipping.

Wed Mar  2 19:04:52 2016 - [info] Reading application default configuration from /etc/masterha/app1.cnf..

Wed Mar  2 19:04:52 2016 - [info] Reading server configuration from /etc/masterha/app1.cnf..

Wed Mar  2 19:04:52 2016 - [info] MHA::MasterMonitor version 0.56.

Wed Mar  2 19:04:52 2016 - [info] GTID failover mode = 0

Wed Mar  2 19:04:52 2016 - [info] Dead Servers:

Wed Mar  2 19:04:52 2016 - [info] Alive Servers:

Wed Mar  2 19:04:52 2016 - [info]   192.168.127.101(192.168.127.101:3306)

Wed Mar  2 19:04:52 2016 - [info]   192.168.127.102(192.168.127.102:3306)

Wed Mar  2 19:04:52 2016 - [info]   192.168.127.103(192.168.127.103:3306)

Wed Mar  2 19:04:52 2016 - [info] Alive Slaves:

Wed Mar  2 19:04:52 2016 - [info]   192.168.127.102(192.168.127.102:3306)  Version=5.6.27-75.0-log (oldest major version between slaves) log-bin:enabled

Wed Mar  2 19:04:52 2016 - [info]     Replicating from 192.168.127.101(192.168.127.101:3306)

Wed Mar  2 19:04:52 2016 - [info]     Primary candidate for the new Master (candidate_master is set)

Wed Mar  2 19:04:52 2016 - [info]   192.168.127.103(192.168.127.103:3306)  Version=5.6.27-75.0-log (oldest major version between slaves) log-bin:enabled

Wed Mar  2 19:04:52 2016 - [info]     Replicating from 192.168.127.101(192.168.127.101:3306)

Wed Mar  2 19:04:52 2016 - [info]     Not candidate for the new Master (no_master is set)

Wed Mar  2 19:04:52 2016 - [info] Current Alive Master: 192.168.127.101(192.168.127.101:3306)

Wed Mar  2 19:04:52 2016 - [info] Checking slave configurations..

Wed Mar  2 19:04:52 2016 - [info] Checking replication filtering settings..

Wed Mar  2 19:04:52 2016 - [info]  binlog_do_db= , binlog_ignore_db=

Wed Mar  2 19:04:52 2016 - [info]  Replication filtering check ok.

Wed Mar  2 19:04:52 2016 - [info] GTID (with auto-pos) is not supported

Wed Mar  2 19:04:52 2016 - [info] Starting SSH connection tests..

Wed Mar  2 19:04:54 2016 - [info] All SSH connection tests passed successfully.

Wed Mar  2 19:04:54 2016 - [info] Checking MHA Node version..

Wed Mar  2 19:04:54 2016 - [info]  Version check ok.

Wed Mar  2 19:04:54 2016 - [info] Checking SSH publickey authentication settings on the current master..

Wed Mar  2 19:04:54 2016 - [info] HealthCheck: SSH to 192.168.127.101 is reachable.

Wed Mar  2 19:04:55 2016 - [info] Master MHA Node version is 0.56.

Wed Mar  2 19:04:55 2016 - [info] Checking recovery script configurations on 192.168.127.101(192.168.127.101:3306)..

Wed Mar  2 19:04:55 2016 - [info]   Executing command: save_binary_logs --command=test --start_pos=4 --binlog_dir=/data/mysql3306 --output_file=/tmp/save_binary_logs_test --manager_version=0.56 --start_file=mysql-bin.000004

Wed Mar  2 19:04:55 2016 - [info]   Connecting to root@192.168.127.101(192.168.127.101:22)..

  Creating /tmp if not exists..    ok.

  Checking output directory is accessible or not..

   ok.

  Binlog found at /data/mysql3306, up to mysql-bin.000004

Wed Mar  2 19:04:55 2016 - [info] Binlog setting check done.

Wed Mar  2 19:04:55 2016 - [info] Checking SSH publickey authentication and checking recovery script configurations on all alive slave servers..

Wed Mar  2 19:04:55 2016 - [info]   Executing command : apply_diff_relay_logs --command=test --slave_user='root' --slave_host=192.168.127.102 --slave_ip=192.168.127.102 --slave_port=3306 --workdir=/tmp --target_version=5.6.27-75.0-log --manager_version=0.56 --relay_log_info=/data/mysql3306/relay-log.info  --relay_dir=/data/mysql3306/  --slave_pass=xxx

Wed Mar  2 19:04:55 2016 - [info]   Connecting to root@192.168.127.102(192.168.127.102:22)..

  Checking slave recovery environment settings..

    Opening /data/mysql3306/relay-log.info ... ok.

    Relay log found at /data/mysql3306, up to mysqld-relay-bin.000002

    Temporary relay log file is /data/mysql3306/mysqld-relay-bin.000002

    Testing mysql connection and privileges..Warning: Using a password on the command line interface can be insecure.

 done.

    Testing mysqlbinlog output.. done.

    Cleaning up test file(s).. done.

Wed Mar  2 19:04:55 2016 - [info]   Executing command : apply_diff_relay_logs --command=test --slave_user='root' --slave_host=192.168.127.103 --slave_ip=192.168.127.103 --slave_port=3306 --workdir=/tmp --target_version=5.6.27-75.0-log --manager_version=0.56 --relay_log_info=/data/mysql3306/relay-log.info  --relay_dir=/data/mysql3306/  --slave_pass=xxx

Wed Mar  2 19:04:55 2016 - [info]   Connecting to root@192.168.127.103(192.168.127.103:22)..

  Checking slave recovery environment settings..

    Opening /data/mysql3306/relay-log.info ... ok.

    Relay log found at /data/mysql3306, up to mysqld-relay-bin.000002

    Temporary relay log file is /data/mysql3306/mysqld-relay-bin.000002

    Testing mysql connection and privileges..Warning: Using a password on the command line interface can be insecure.

 done.

    Testing mysqlbinlog output.. done.

    Cleaning up test file(s).. done.

Wed Mar  2 19:04:55 2016 - [info] Slaves settings check done.

Wed Mar  2 19:04:55 2016 - [info]


 +--192.168.127.102(192.168.127.102:3306)

 +--192.168.127.103(192.168.127.103:3306)

Wed Mar  2 19:04:55 2016 - [info] Checking replication health on 192.168.127.102..

Wed Mar  2 19:04:55 2016 - [info]  ok.

Wed Mar  2 19:04:55 2016 - [info] Checking replication health on 192.168.127.103..

Wed Mar  2 19:04:55 2016 - [info]  ok.

Wed Mar  2 19:04:55 2016 - [info] Checking master_ip_failover_script status:

Wed Mar  2 19:04:55 2016 - [info]   /usr/local/bin/master_ip_failover --command=status --ssh_user=root --orig_master_host=192.168.127.101 --orig_master_ip=192.168.127.101 --orig_master_port=3306

Wed Mar  2 19:04:55 2016 - [info]  OK.

Wed Mar  2 19:04:55 2016 - [warning] shutdown_script is not defined.

Wed Mar  2 19:04:55 2016 - [info] Got exit code 0 (Not master dead).

MySQL Replication Health is OK.

说明成功

6.通过脚本管理 VIP

修改master_ip_failover文件(/usr/local/bin

 

#!/usr/bin/env perl

 

#  Copyright (C) 2011 DeNA Co.,Ltd.

#

#  This program is free software; you can redistribute it and/or modify

#  it under the terms of the GNU General Public License as published by

#  the Free Software Foundation; either version 2 of the License, or

#  (at your option) any later version.

#

#  This program is distributed in the hope that it will be useful,

#  but WITHOUT ANY WARRANTY; without even the implied warranty of

#  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the

#  GNU General Public License for more details.

#

#  You should have received a copy of the GNU General Public License

#   along with this program; if not, write to the Free Software

#  Foundation, Inc.,

#  51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA

 

## Note: This is a sample script and is not complete. Modify the script based on your environment.

 

use strict;

use warnings FATAL => 'all';

 

use Getopt::Long;

 

my (

  $command,          $ssh_user,        $orig_master_host, $orig_master_ip,

  $orig_master_port, $new_master_host, $new_master_ip,    $new_master_port

);

 

my $vip='192.168.127.202/24';

my $key="2";

my $ssh_start_vip ="/sbin/ifconfig eth0:$key $vip";

my $ssh_stop_vip="/sbin/ifconfig eth0:$key down";

 

GetOptions(

  'command=s'          => \$command,

  'ssh_user=s'         => \$ssh_user,

  'orig_master_host=s' => \$orig_master_host,

  'orig_master_ip=s'   => \$orig_master_ip,

  'orig_master_port=i' => \$orig_master_port,

  'new_master_host=s'  => \$new_master_host,

  'new_master_ip=s'    => \$new_master_ip,

  'new_master_port=i'  => \$new_master_port,

);

 

exit &main();

 

sub main {

  if ( $command eq "stop" || $command eq "stopssh" ) {

 

    # $orig_master_host, $orig_master_ip, $orig_master_port are passed.

    # If you manage master ip address at global catalog database,

    # invalidate orig_master_ip here.

    my $exit_code = 1;

    eval {

     

      print "Disabling the VIP on old master: $orig_master_host \n";

         &stop_vip();

      $exit_code = 0;

    };

    if ($@) {

      warn "Got Error: $@\n";

      exit $exit_code;

    }

    exit $exit_code;

  }

  elsif ( $command eq "start" ) {

 

    # all arguments are passed.

    # If you manage master ip address at global catalog database,

    # activate new_master_ip here.

    # You can also grant write access (create user, set read_only=0, etc) here.

    my $exit_code = 10;

    eval {

         print "Enabling the VIP - $vip on the new master - $new_master_host \n";

         &start_vip();

      $exit_code = 0;

    };

    if ($@) {

      warn $@;

 

      # If you want to continue failover, exit 10.

      exit $exit_code;

    }

    exit $exit_code;

  }

  elsif ( $command eq "status" ) {

    print "Checking the Status of the script.. ok \n";

    # do nothing

    exit 0;

  }

  else {

    &usage();

    exit 1;

  }

}

 

sub start_vip(){

         `ssh $ssh_user\@$new_master_host \ " $ssh_start_vip \"`;

}

 

sub stop_vip(){

        `ssh $ssh_user\@$orig_master_host \ " $ssh_stop_vip \"`;

}

sub usage {

  print

"Usage: master_ip_failover --command=start|stop|stopssh|status --orig_master_host=host --orig_master_ip=ip --orig_master_port=port --new_master_host=host --new_master_ip=ip --new_master_port=port\n";

}

 

注意:首先启动VIP192.168.127.101master)上

/sbin/ifconfig eth0:2 192.168.127.202/24

7.开启MHA Manager监控

nohup masterha_manager --conf=/etc/masterha/app1.cnf > /masterha/app1/manager.log  </dev/null 2>&1 &

8.查看启动状态

[root@MHA ~]# masterha_check_status --conf=/etc/masterha/app1.cnf

app1 (pid:27237) is running(0:PING_OK), master:192.168.127.101

 

9. 查看启动日志

[root@MHA ~]# tail -f /masterha/app1/app1.log

 +--192.168.127.103(192.168.127.103:3306)

 

Wed Mar  2 19:08:34 2016 - [info] Checking master_ip_failover_script status:

Wed Mar  2 19:08:34 2016 - [info]   /usr/local/bin/master_ip_failover --command=status --ssh_user=root --orig_master_host=192.168.127.101 --orig_master_ip=192.168.127.101 --orig_master_port=3306

Wed Mar  2 19:08:34 2016 - [info]  OK.

Wed Mar  2 19:08:34 2016 - [warning] shutdown_script is not defined.

Wed Mar  2 19:08:34 2016 - [info] Set master ping interval 1 seconds.

Wed Mar  2 19:08:34 2016 - [info] Set secondary check script: /usr/bin/masterha_secondary_check  -s MHA -s slave02  --user=root --master_host=master --master_ip=192.168.127.101 --master_port=3306 --password=123456

Wed Mar  2 19:08:34 2016 - [info] Starting ping health check on 192.168.127.101(192.168.127.101:3306)..

Wed Mar  2 19:08:34 2016 - [info] Ping(SELECT) succeeded, waiting until MySQL doesn't respond..

 

查看VIP

[root@master ~]# ip addr

1: lo: <LOOPBACK,UP,LOWER_UP> mtu 16436 qdisc noqueue state UNKNOWN

    link/loopback 00:00:00:00:00:00 brd 00:00:00:00:00:00

    inet 127.0.0.1/8 scope host lo

    inet6 ::1/128 scope host

       valid_lft forever preferred_lft forever

2: eth0: <BROADCAST,MULTICAST,UP,LOWER_UP> mtu 1500 qdisc pfifo_fast state UP qlen 1000

    link/ether 00:0c:29:08:25:3f brd ff:ff:ff:ff:ff:ff

    inet 192.168.127.101/24 brd 192.168.127.255 scope global eth0

    inet 192.168.127.202/24 brd 192.168.127.255 scope global secondary eth0:2

    inet6 fe80::20c:29ff:fe08:253f/64 scope link

       valid_lft forever preferred_lft forever

3: pan0: <BROADCAST,MULTICAST> mtu 1500 qdisc noop state DOWN

link/ether 0e:ed:39:ba:c1:1b brd ff:ff:ff:ff:ff:ff

 

 

10.测试切换

测试关闭主库

[root@master ~]# /etc/init.d/mysql stop

Shutting down MySQL (Percona Server)......                 [  OK  ]

查看slave02复制状态:

[root@slave02 ~]# mysql

Welcome to the MySQL monitor.  Commands end with ; or \g.

Your MySQL connection id is 27

Server version: 5.6.27-75.0-log Percona Server (GPL), Release 75.0, Revision 8bb53b6

 

Copyright (c) 2009-2015 Percona LLC and/or its affiliates

Copyright (c) 2000, 2015, Oracle and/or its affiliates. All rights reserved.

 

Oracle is a registered trademark of Oracle Corporation and/or its

affiliates. Other names may be trademarks of their respective

owners.

 

Type 'help;' or '\h' for help. Type '\c' to clear the current input statement.

 

root@(none) 07:42:08>show slave status\G;

*************************** 1. row ***************************

               Slave_IO_State: Waiting for master to send event

                  Master_Host: 192.168.127.102  #已经自动切换了

                  Master_User: repl

                  Master_Port: 3306

                Connect_Retry: 60

              Master_Log_File: mysql-bin.000003

          Read_Master_Log_Pos: 981

               Relay_Log_File: mysqld-relay-bin.000002

                Relay_Log_Pos: 283

        Relay_Master_Log_File: mysql-bin.000003

             Slave_IO_Running: Yes

            Slave_SQL_Running: Yes

              Replicate_Do_DB:

          Replicate_Ignore_DB:

           Replicate_Do_Table:

       Replicate_Ignore_Table:

      Replicate_Wild_Do_Table:

  Replicate_Wild_Ignore_Table:

                   Last_Errno: 0

                   Last_Error:

                 Skip_Counter: 0

          Exec_Master_Log_Pos: 981

              Relay_Log_Space: 457

              Until_Condition: None

               Until_Log_File:

                Until_Log_Pos: 0

           Master_SSL_Allowed: No

           Master_SSL_CA_File:

           Master_SSL_CA_Path:

              Master_SSL_Cert:

            Master_SSL_Cipher:

               Master_SSL_Key:

        Seconds_Behind_Master: 0

Master_SSL_Verify_Server_Cert: No

                Last_IO_Errno: 0

                Last_IO_Error:

               Last_SQL_Errno: 0

               Last_SQL_Error:

  Replicate_Ignore_Server_Ids:

             Master_Server_Id: 102

                  Master_UUID: 1bb38a96-e066-11e5-84cb-000c2976ee35

             Master_Info_File: /data/mysql3306/master.info

                    SQL_Delay: 0

          SQL_Remaining_Delay: NULL

      Slave_SQL_Running_State: Slave has read all relay log; waiting for the slave I/O thread to update it

           Master_Retry_Count: 86400

                  Master_Bind:

      Last_IO_Error_Timestamp:

     Last_SQL_Error_Timestamp:

               Master_SSL_Crl:

           Master_SSL_Crlpath:

           Retrieved_Gtid_Set:

            Executed_Gtid_Set:

                Auto_Position: 0

1 row in set (0.00 sec)

 

ERROR:

No query specified

查看VIP漂移slave01(192.168.247.102)

[root@slave01 ~]# ip addr

1: lo: <LOOPBACK,UP,LOWER_UP> mtu 16436 qdisc noqueue state UNKNOWN

    link/loopback 00:00:00:00:00:00 brd 00:00:00:00:00:00

    inet 127.0.0.1/8 scope host lo

    inet6 ::1/128 scope host

       valid_lft forever preferred_lft forever

2: eth0: <BROADCAST,MULTICAST,UP,LOWER_UP> mtu 1500 qdisc pfifo_fast state UP qlen 1000

    link/ether 00:0c:29:76:ee:35 brd ff:ff:ff:ff:ff:ff

    inet 192.168.127.102/24 brd 192.168.127.255 scope global eth0

    inet 192.168.127.202/24 brd 192.168.127.255 scope global secondary eth0:2

    inet6 fe80::20c:29ff:fe76:ee35/64 scope link

       valid_lft forever preferred_lft forever

3: pan0: <BROADCAST,MULTICAST> mtu 1500 qdisc noop state DOWN

    link/ether 1e:77:57:63:5e:b0 brd ff:ff:ff:ff:ff:ff

 

10. 修改宕机的Master

通常情况自动切换后,原master 可能已经废弃掉,待原master 主机修改很复后,如果数据完整的情况,可能想把原master重新作为新主库的slave,这是我们就需要借助当时自动切换时刻的MHA日志来完成对原master的修复。下面是提取相关日志的命令:

 

[root@MHA ~]# grep -i 'change' /masterha/app1/app1.log

Wed Mar  2 19:09:23 2016 - [info]  All other slaves should start replication from here. Statement should be: CHANGE MASTER TO MASTER_HOST='192.168.127.102', MASTER_PORT=3306, MASTER_LOG_FILE='mysql-bin.000003', MASTER_LOG_POS=981, MASTER_USER='repl', MASTER_PASSWORD='xxx';

Wed Mar  2 19:09:23 2016 - [info]  Executed CHANGE MASTER.

11. 修复master变成从库

master(192.168.127.101)操作如下:

[root@master ~]# /etc/init.d/mysql start

Starting MySQL (Percona Server)..                          [  OK  ]

[root@master ~]# mysql

Welcome to the MySQL monitor.  Commands end with ; or \g.

Your MySQL connection id is 1

Server version: 5.6.27-75.0-log Percona Server (GPL), Release 75.0, Revision 8bb53b6

 

Copyright (c) 2009-2015 Percona LLC and/or its affiliates

Copyright (c) 2000, 2015, Oracle and/or its affiliates. All rights reserved.

 

Oracle is a registered trademark of Oracle Corporation and/or its

affiliates. Other names may be trademarks of their respective

owners.

 

Type 'help;' or '\h' for help. Type '\c' to clear the current input statement.

 

root@(none) 07:26:45>CHANGE MASTER TO MASTER_HOST='192.168.127.102', MASTER_PORT=3306, MASTER_LOG_FILE='mysql-bin.000003', MASTER_LOG_POS=981, MASTER_USER='repl', MASTER_PASSWORD='repl';

Query OK, 0 rows affected, 2 warnings (0.06 sec)

 

root@(none) 07:26:47>start slave;

Query OK, 0 rows affected (0.02 sec)

root@(none) 07:26:49>show slave status\G;

*************************** 1. row ***************************

               Slave_IO_State: Waiting for master to send event

                  Master_Host: 192.168.127.102

                  Master_User: repl

                  Master_Port: 3306

                Connect_Retry: 60

              Master_Log_File: mysql-bin.000003

          Read_Master_Log_Pos: 981

               Relay_Log_File: mysqld-relay-bin.000002

                Relay_Log_Pos: 283

        Relay_Master_Log_File: mysql-bin.000003

             Slave_IO_Running: Yes

            Slave_SQL_Running: Yes

              Replicate_Do_DB:

          Replicate_Ignore_DB:

           Replicate_Do_Table:

       Replicate_Ignore_Table:

      Replicate_Wild_Do_Table:

  Replicate_Wild_Ignore_Table:

                   Last_Errno: 0

                   Last_Error:

                 Skip_Counter: 0

          Exec_Master_Log_Pos: 981

              Relay_Log_Space: 457

              Until_Condition: None

               Until_Log_File:

                Until_Log_Pos: 0

           Master_SSL_Allowed: No

           Master_SSL_CA_File:

           Master_SSL_CA_Path:

              Master_SSL_Cert:

            Master_SSL_Cipher:

               Master_SSL_Key:

        Seconds_Behind_Master: 0

Master_SSL_Verify_Server_Cert: No

                Last_IO_Errno: 0

                Last_IO_Error:

               Last_SQL_Errno: 0

               Last_SQL_Error:

  Replicate_Ignore_Server_Ids:

             Master_Server_Id: 102

                  Master_UUID: 1bb38a96-e066-11e5-84cb-000c2976ee35

             Master_Info_File: /data/mysql3306/master.info

                    SQL_Delay: 0

          SQL_Remaining_Delay: NULL

      Slave_SQL_Running_State: Slave has read all relay log; waiting for the slave I/O thread to update it

           Master_Retry_Count: 86400

                  Master_Bind:

      Last_IO_Error_Timestamp:

     Last_SQL_Error_Timestamp:

               Master_SSL_Crl:

           Master_SSL_Crlpath:

           Retrieved_Gtid_Set:

            Executed_Gtid_Set:

                Auto_Position: 0

1 row in set (0.00 sec)

 

ERROR:

No query specified

12. 开启新的MHA Manager监控

[root@MHA ~]# cd /etc/masterha/

[root@MHA masterha]# cp app1.cnf app2.cnf

修改配置如下,注意:红色是修改地方

[root@MHA masterha]# vi  app2.cnf

[server default]

manager_workdir=/masterha/app1

manager_log=/masterha/app1/app1.log

master_ip_failover_script=/usr/local/bin/master_ip_failover

master_ip_online_change_script=/usr/local/bin/master_ip_online_change

 

user=root

password=123456

ssh_user=root

repl_user=repl

repl_password=repl

ping_interval=1

remote_workdir=/tmp

report_script=/usr/local/bin/send_report

secondary_check_script=/usr/bin/masterha_secondary_check  -s master  -s slave01  --user=root --master_host=slave01 --master_ip=192.168.127.102 --master_port=3306 --password=123456

shutdown_script=""

report_script=""

 

 

[server1]

hostname=192.168.127.102

master_binlog_dir=/data/mysql3306

candidate_master=1

[server2]

hostname=192.168.127.101

master_binlog_dir=/data/mysql3306

candidate_master=1

check_repl_delay=0

 

[server3]

hostname=192.168.127.103

master_binlog_dir=/data/mysql3306

no_master=1

 

查看

[root@MHA masterha]# masterha_check_status --conf=/etc/masterha/app1.cnf

app1 is stopped(2:NOT_RUNNING).

启动新的MHA监控

[root@MHA masterha]# nohup masterha_manager --conf=/etc/masterha/app2.cnf > /masterha/app1/manager.log  </dev/null 2>&1 &

[1] 2089

查看启动状态

[root@MHA masterha]# masterha_check_status --conf=/etc/masterha/app2.cnf

app2 (pid:2089) is running(0:PING_OK), master:192.168.127.102

 

以上测试成功,为了保证稳定,反复测试一下。

 

13.MHA+半同步复制

为了保证数据一致性采用半同步复制

1Master(192.168.127.101),slave01(192.168.127.102)操作如下:

执行安装相关插入件启动半同步复制

INSTALL PLUGIN rpl_semi_sync_master SONAME 'semisync_master.so';

SET GLOBAL rpl_semi_sync_master_enabled=1;

SET GLOBAL rpl_semi_sync_master_timeout=10000;

切换时也可能当作从库,所以也操作如下步骤

INSTALL PLUGIN rpl_semi_sync_slave SONAME 'semisync_slave.so';

SET GLOBAL rpl_semi_sync_slave_enabled=1;

 

在配置文件my.cnf增加以下参数

#############半同步###########

rpl_semi_sync_master_enabled=1 

rpl_semi_sync_master_timeout=1000 

rpl_semi_sync_master_trace_level=32 

rpl_semi_sync_master_wait_no_slave=on

 

rpl_semi_sync_slave_enabled=1

#################################

 

(2)       所以的从都操作如下:

Slave02(192.168.127.102)的操作

执行安装相关插入件启动半同步复制

INSTALL PLUGIN rpl_semi_sync_slave SONAME 'semisync_slave.so';

SET GLOBAL rpl_semi_sync_slave_enabled=1;

在配置文件my.cnf增加以下参数

#############半同步###########

rpl_semi_sync_slave_enabled=1

#################################

 

以上配置成功,不需要重启

 

查看主库的半同步

root@(none) 11:36:36>show variables like 'rpl%';

+------------------------------------+----------+

| Variable_name                      | Value    |

+------------------------------------+----------+

| rpl_semi_sync_master_enabled       | ON       |

| rpl_semi_sync_master_timeout       | 10000    |

| rpl_semi_sync_master_trace_level   | 32       |

| rpl_semi_sync_master_wait_no_slave | ON       |

| rpl_semi_sync_slave_enabled        | ON       |

| rpl_semi_sync_slave_trace_level    | 32       |

| rpl_stop_slave_timeout             | 31536000 |

+------------------------------------+----------+

7 rows in set (0.01 sec)

 

查看从库的半同步

 

root@(none) 11:36:36>show variables like 'rpl%';

+---------------------------------+----------+

| Variable_name                   | Value    |

+---------------------------------+----------+

| rpl_semi_sync_slave_enabled     | ON       |

| rpl_semi_sync_slave_trace_level | 32       |

| rpl_stop_slave_timeout          | 31536000 |

+---------------------------------+----------+

3 rows in set (0.01 sec)

 

温馨提示如有转载或引用以上内容之必要,敬请将本文链接作为出处标注,谢谢合作!

已有 0/18449 人参与

发表评论:

欢迎使用手机扫描访问本站,还可以关注微信哦~