Posted on

Heartbeat/corosync+pacemaker+ldirectord 实现Nginx负载均衡

2010-02-01 17:34

Heartbeat/corosync+pacemaker+ldirectord 实现Nginx负载均衡

Heartbeat/corosync+pacemaker+ldirectord 实现Nginx负载均

欢迎访问yoyotown.com一起讨论IT技术。

系统:CentOS 5.4
IP分配:
查看源代码
打印帮助
HA1     eth0:192.168.0.66   eth1192.168.10.1
HA2     eth0:192.168.0.69   eth1192.168.10.2
VIP     192.168.0.120

1. 安装pacemakerheartbeat
[root@HA1 ~]# wget -O /etc/yum.repos.d/pacemaker.repo http://clusterlabs.org/rpm/epel-5/clusterlabs.repo
[root@HA1 ~]# wget
[root@HA1 ~]# rpm -ivh libesmtp-1.0.4-6.el5.kb.i386.rpm
[root@HA1 ~]# yum install -y pacemaker heartbeat

2. 安装ldirectord
[root@HA1 ~]# yum install -y ldirectord

3. 配置
3.1 配置Heartbeat
[root@HA1 ~]# cp /usr/share/doc/heartbeat-3.0.1/{ha.cf,authkeys} /etc/ha.d/

[root@HA1 ~]# cat /etc/ha.d/authkeys
查看源代码
打印帮助
1        auth 1
2        1 crc

[root@HA1 ~]# cat /etc/ha.d/ha.cf |grep -v #
查看源代码
打印帮助
01        logfile /var/log/ha-log
02        logfacility local0
03        keepalive 2
04        deadtime 30
05        warntime 10
06        initdead 120
07        udpport 695
08        ucast eth1 192.168.10.2     # 注意此处HA2改为:ucast eth1 192.168.10.1
09        auto_failback on
10        watchdog /dev/watchdog
11        node    HA1
12        node    HA2
13        ping 192.168.0.1
14        respawn hacluster /usr/lib/heartbeat/ipfail
15        apiauth ipfail gid=haclient uid=hacluster
16        crm on

3.2 corosync替换heartbeat(可选)
corosync是基于OpenAIS构建的集群引擎,可替代heartbeat进行心跳检测。
The Corosync Cluster Engine is an open source project Licensed under the BSD License derived from the OpenAIS project. OpenAIS uses a UDP multicast based communication protocol to periodically check for node availability.

[root@HA1 ~]# yum install -y corosync
[root@HA1 ~]# corosync-keygen
Corosync Cluster Engine Authentication key generator.
Gathering 1024 bits for key from /dev/random.
Press keys on your keyboard to generate entropy.
Writing corosync key to /etc/corosync/authkey.

[root@HA1 ~]# scp /etc/corosync/authkey HA2:/etc/corosync/
[root@HA1 ~]# cp /etc/corosync/corosync.conf.example /etc/corosync/corosync.conf
[root@HA1 ~]# vi !$
查看源代码
打印帮助
01        # Please read the corosync.conf.5 manual page
02        compatibility: whitetank
03
04        totem {
05        version: 2
06        secauth: off
07        threads: 0
08        interface {
09        ringnumber: 0
10        bindnetaddr: 192.168.10.0
11        mcastaddr: 226.94.1.1
12        mcastport: 5405
13        }
14        }
15
16        logging {
17        fileline: off
18        to_stderr: yes
19        to_logfile: yes
20        to_syslog: yes
21        logfile: /var/log/corosync.log
22        debug: off
23        timestamp: on
24        logger_subsys {
25        subsys: AMF
26        debug: off
27        }
28        }
29
30        amf {
31        mode: disabled
32        }
33
34        service {
35                # Load the Pacemaker Cluster Resource Manager
36                ver:       0
37                name:      pacemaker
38                use_mgmtd: yes
39        }

[root@HA1 ~]# scp /etc/corosync/corosync.conf HA2:/etc/corosync/corosync.conf
[root@HA1 ~]# service corosync start
Starting Corosync Cluster Engine (corosync):               [  OK  ] [root@HA1 ~]# chkconfig level 2345 corosync on
[root@HA1 ~]# chkconfig level 2345 heartbeat off

HA2上执行:
[root@HA2 ~]# chown root:root /etc/corosync/authkey
[root@HA2 ~]# chmod 400 /etc/corosync/authkey
[root@HA2 ~]# service corosync start
Starting Corosync Cluster Engine (corosync):               [  OK  ] [root@HA2 ~]# chkconfig level 2345 corosync on
[root@HA2 ~]# chkconfig level 2345 heartbeat off

3.3 安装配置ldirectord
[root@HA1 ~]# cat /etc/ha.d/ldirectord.cf
查看源代码
打印帮助
01        checktimeout=5
02        checkinterval=7
03        autoreload=yes
04        logfile=”/var/log/ldirectord.log”
05        quiescent=yes
06        emailalert=shidl@baihe.com
07        # A server with a page at the main root of the site that displays “Nginx”
08        virtual=192.168.0.120:80
09        real=192.168.0.66:80 gate
10        real=192.168.0.69:80 gate
11        service=http
12        request=”/lb.html”    # 在根目录下编写lb.html,内容为live
13        receive=”live”
14        scheduler=wlc
15        protocol=tcp
16        checktype=negotiate

3.4 配置hosts
[root@HA1 ~]# cat /etc/hosts
查看源代码
打印帮助
1        # Do not remove the following line, or various programs
2        # that require network functionality will fail.
3        127.0.0.1       vpc localhost.localdomain localhost
4        ::1     localhost6.localdomain6 localhost6
5        192.168.10.1    HA1
6        192.168.10.2    HA2

3.5 配置lo:0设备
查看源代码
打印帮助
1        [root@HA1 ~]# cat >>/etc/sysconfig/network-scripts/ifcfg-lo:0<<EOF
2        <pre>DEVICE=lo:0
3        IPADDR=192.168.0.120
4        NETMASK=”255.255.255.255″
5        ONBOOT=yes
6        NAME=loopback
7
8        EOF

3.6 启用转发,禁用arp
[root@HA1 ~]# vi /etc/sysctl.conf
修改net.ipv4.ip_forward = 0为net.ipv4.ip_forward = 1
添加下面行:
查看源代码
打印帮助
1        net.ipv4.conf.all.arp_ignore = 1
2        net.ipv4.conf.eth0.arp_ignore = 1
3        net.ipv4.conf.all.arp_announce = 2
4        net.ipv4.conf.eth0.arp_announce = 2

[root@HA1 ~]# sysctl -p

将配置拷贝到HA2
[root@HA1 ~]# scp /etc/ha.d/{ha.cf,authkeys,ldirectord.cf} HA2:/etc/ha.d/
[root@HA1 ~]# scp /etc/{hosts,sysctl.conf} HA2:/etc/
[root@HA1 ~]# scp /etc/sysconfig/network-scripts/ifcfg-lo:0 HA2:/etc/sysconfig/network-scripts/

HA2上修改/etc/ha.d/ha.cf
ucast eth1 192.168.10.2 改为:ucast eth1 192.168.10.1
并使sysctl.conf配置生效:
[root@HA2~]# sysctl -p

3.7 HA1HA2上配置并安装好nginx
编写nginx lsb资源代理脚本(注意nginx安装路径):
[root@HA1 ~]# cat /etc/init.d/nginxd
查看源代码
打印帮助
01        #!/bin/sh
02
03        # source function library
04        . /etc/rc.d/init.d/functions
05
06        # Source networking configuration.
07        . /etc/sysconfig/network
08
09        # Check that networking is up.
10        [ ${NETWORKING} = “no” ] && exit 0
11
12        RETVAL=0
13        prog=”nginx”
14
15        nginxDir=/usr/local/nginx
16        nginxd=$nginxDir/sbin/nginx
17        nginxConf=$nginxDir/conf/nginx.conf
18        nginxPid=$nginxDir/nginx.pid
19
20        nginx_check()
21        {
22            if [[ -e $nginxPid ]]; then
23                ps aux |grep -v grep |grep -q nginx
24                if (( $? == 0 )); then
25                    echo “$prog already running…”
26                    exit 1
27                else
28                    rm -rf $nginxPid &> /dev/null
29                fi
30            fi
31        }
32
33        start()
34        {
35            nginx_check
36            if (( $? != 0 )); then
37                true
38            else
39                echo -n $”Starting $prog:”
40                daemon $nginxd -c $nginxConf
41                RETVAL=$?
42                echo
43                [ $RETVAL = 0 ] && touch /var/lock/subsys/nginx
44                return $RETVAL
45            fi
46        }
47
48        stop()
49        {
50            echo -n $”Stopping $prog:”
51            killproc $nginxd
52            RETVAL=$?
53            echo
54            [ $RETVAL = 0 ] && rm -f /var/lock/subsys/nginx $nginxPid
55        }
56
57        reload()
58        {
59            echo -n $”Reloading $prog:”
60            killproc $nginxd -HUP
61            RETVAL=$?
62            echo
63        }
64
65        case “$1″ in
66                start)
67                        start
68                        ;;
69                stop)
70                        stop
71                        ;;
72                restart)
73                        stop
74                        start
75                        ;;
76                reload)
77                        reload
78                        ;;
79                status)
80                        status $prog
81                        RETVAL=$?
82                        ;;
83                *)
84                        echo $”Usage: $0 {start|stop|restart|reload|status}”
85                        RETVAL=1
86        esac
87        exit $RETVAL

[root@HA1 ~]# chmod +x  /etc/init.d/nginxd
[root@HA1 ~]# scp  /etc/init.d/nginxd HA2: /etc/init.d/nginxd

[root@HA1 ~]# service network restart
[root@HA1 ~]# service heartbeat start

[root@HA2 ~]# service network restart
[root@HA2 ~]# service heartbeat start

4. 配置集群资源:

Heartbeat和其他应用提供的ocf代理脚本或许会有错误,我们可以用下面方法排错:
要检查 OCF 脚本,请首先设置所需的环境变量。例如,当测试IPaddr OCF 脚本时,您必须通过设置一个变量名称前缀为 OCF_RESKEY_的环境变量来设置变量 ip 的值。对于此示例,可运行以下命令:
查看源代码
打印帮助
1        export OCF_RESKEY_ip=
2        /usr/lib/ocf/resource.d/heartbeat/IPaddr validate-all
3        /usr/lib/ocf/resource.d/heartbeat/IPaddr start
4        /usr/lib/ocf/resource.d/heartbeat/IPaddr stop

如果此操作不成功,很可能是您缺少某个必需变量或者只是输错了参数。

排错ldirectord ocf代理脚本:
export OCF_ROOT=/usr/lib/ocf
根据自己的环境设置修改下面两行:
[root@HA1 ~]# vi /usr/lib/ocf/resource.d/heartbeat/ldirectord
查看源代码
打印帮助
1        LDIRCONF=${OCF_RESKEY_configfile:-/etc/ha.d/ldirectord.cf}
2        LDIRECTORD=${OCF_RESKEY_ldirectord:-/usr/sbin/ldirectord}

[root@HA1 ~]# /usr/lib/ocf/resource.d/heartbeat/ldirectord monitor
[root@HA1 ~]# echo $?
7     # ldirectord未运行返回7,运行正常返回0

[root@HA1 ~]# crm
crm(live)# configure
crm(live)configure# node HA1
crm(live)configure# node HA2
crm(live)configure# primitive ldirectord ocf:heartbeat:ldirectord \
> params configfile=/etc/ha.d/ldirectord.cf” \
> op monitor interval=30s” timeout=20s” \
> meta migration-threshold=10〃 target-role=Started
crm(live)configure# primitive vip ocf:heartbeat:IPaddr2 \
> params lvs_support=true” ip=192.168.0.120〃 cidr_netmask=24〃 broadcast=192.168.0.255〃 \
> op monitor interval=1m” timeout=20s” \
> meta migration-threshold=10〃
crm(live)configure# primitive nginxd lsb:nginxd \
> op monitor interval=30s” timeout=20s” \
> meta migration-threshold=10〃 target-role=Started
crm(live)configure# group load-balancing vip ldirectord
crm(live)configure# clone cl-nginxd nginxd
crm(live)configure# location perfer-ha1 load-balancing \
> rule $id=prefer-ha1-rule” 100: #uname eq HA1
crm(live)configure# property stonith-enabled=false” \
> no-quorum-policy=ignore” \
> start-failure-is-fatal=false” \
> expected-quorum-votes=2
crm(live)configure# verify
crm(live)configure# commit
crm(live)configure# end
crm(live)# status

============
Last updated: Thu Nov 12 01:00:13 2009
Stack: Heartbeat
Current DC: HA2 – partition with quorum
Version: 1.0.6-f709c638237cdff7556cb6ab615f32826c0f8c06
2 Nodes configured, 2 expected votes
2 Resources configured.
============

Online: [ HA2 HA1 ]

Clone Set: cl-nginxd
Started: [ HA2 HA1 ] Resource Group: load-balancing
vip    (ocf::heartbeat:IPaddr2):    Started HA1
ldirectord    (ocf::heartbeat:ldirectord):    Started HA1

crm(live)# quit
bye

5. 验证
[root@HA1 ~]# ipvsadm -l
IP Virtual Server version 1.2.1 (size=4096)
Prot LocalAddressort Scheduler Flags
-> RemoteAddressort           Forward Weight ActiveConn InActConn
TCP  192.168.0.120:http wlc
-> 192.168.0.69:http            Route   1      0          0
-> 192.168.0.66:http            Local   1      0          0

用浏览器访问网站看是否正常。

禁用HA1eth1网卡,在HA2上看故障转移情况。
[root@HA2 ~]# crm
crm(live)# status

============
Last updated: Thu Nov 12 18:40:54 2009
Stack: Heartbeat
Current DC: HA2 – partition WITHOUT quorum
Version: 1.0.6-f709c638237cdff7556cb6ab615f32826c0f8c06
2 Nodes configured, 2 expected votes
2 Resources configured.
============

Online: [ HA2 ] OFFLINE: [ HA1 ]

Clone Set: cl-nginxd
Started: [ HA2 ] Stopped: [ nginxd:0 ] Resource Group: load-balancing
vip    (ocf::heartbeat:IPaddr2):    Started HA2
ldirectord    (ocf::heartbeat:ldirectord):    Started HA2

启用HA1eth1网卡,在HA1上看故障转移情况。

[root@HA1 ~]# crm status

============
Last updated: Thu Nov 12 18:42:55 2009
Stack: Heartbeat
Current DC: HA1 – partition with quorum
Version: 1.0.6-f709c638237cdff7556cb6ab615f32826c0f8c06
2 Nodes configured, 2 expected votes
2 Resources configured.
============

Online: [ HA2 HA1 ]

Clone Set: cl-nginxd
Started: [ HA1 HA2 ] Resource Group: load-balancing
vip    (ocf::heartbeat:IPaddr2):    Started HA1
ldirectord    (ocf::heartbeat:ldirectord):    Started HA1

6. 参考:

Load Balanced MySQL Replicated Cluster
Debian Lenny HowTo

相关文章:

* 2009/12/11 — MySQL-Nginx-Pacemaker-corosync(openais)-drbd active/passive cluster
* 2009/12/09 — Heartbeat实现Nginx高可用性(style 2.x)
* 2009/12/08 — Heartbeat实现Nginx高可用性(style 1.x)
* 2009/12/10 — CRM命令行工具配置集群资源
* 2009/12/08 — HA体系架构及内部处理流程

Leave a Reply

Your email address will not be published.