heartbeat消息类型
heartbeat消息类型:心跳消息为约150字节的数据包,可为单薄,广播,多播的方式,控制心跳频率及出现故障要等待多久进行故障切换集群转换消息ip-request和ip-request-resp当主服务器恢复在线状态时,通过ip-request消息要求备机释放主服务器失败时备服务器取得的资源,然后备份服务器关闭服务释放主服务器失败时取得的资源及服务 备服务器释放主服务器失败时取得的资源及服务后,就会通过ip-request-resp消息通知主服务器它不在拥有该资源及服务,主服务器收到来自备节点的ip-request-resp消息通知后,启动失败时释放的资源及服务,并开始提供正常的访问服务!重传请求rexmit-request控制重传心跳请求!
以上的心跳控制消息使用UDP协议发送到/etc/ha.d/ha.cf文件制定的任意端口,或制定的多播地址,如果使用默认为694心跳消息的查看:借鉴http://blog.chinaunix.net/uid-7921481-id-1617030.html
heartbeat中,可以使用命令cl_status来查询集群心跳的相关信息。显示节点server-1所使用的心跳:
[root@server-1 bin]# ./cl_status listhblinks server-1 eth1 eth0
显示节点server-1的eth0心跳状态:
[root@server-1 bin]# ./cl_status hblinkstatus server-1 eth0 up 为了在mgmt下可以检查心跳状态,在mgmt/daemon/mgmt_hb.c中添加如下函数: char* on_status_of_hblinks(char* argv[], int argc) { const char * intf; const char * if_status; const char* name = NULL; char* ret = cl_strdup(MSG_OK); if (hb->llc_ops->init_nodewalk(hb) != HA_OK) { mgmt_log(LOG_ERR, "Cannot start node walk"); mgmt_log(LOG_ERR, "REASON: %s", hb->llc_ops->errmsg(hb)); cl_free(ret); return cl_strdup(MSG_FAIL); } while((name = hb->llc_ops->nextnode(hb))!= NULL) { if (strcmp(hb->llc_ops->node_type(hb,name), "ping") == 0) continue; if (strcmp(get_localnodeinfo(), name) == 0) continue; if (hb->llc_ops->init_ifwalk(hb, name) != HA_OK) { cl_log(LOG_ERR, "Cannot start heartbeat link interface walk."); cl_log(LOG_ERR, "REASON: %s", hb->llc_ops->errmsg(hb)); return cl_strdup(MSG_FAIL"nCannot start heartbeat link interface walk"); } while ((intf = hb->llc_ops->nextif(hb))) { if_status = hb->llc_ops->if_status(hb, name, intf); if (if_status == NULL) { /* Should be error ? */ cl_log(LOG_ERR, "Cannot get heartbeat link status"); cl_log(LOG_ERR, "REASON: %s", hb->llc_ops->errmsg(hb)); return cl_strdup(MSG_FAIL"nCannot get heartbeat link status"); } if ( STRNCMP_CONST(if_status, "dead") == 0 ) { return cl_strdup(MSG_OK"nhas_dead"); } } if (hb->llc_ops->end_ifwalk(hb) != HA_OK) { cl_log(LOG_ERR, "Cannot end heartbeat link interface walk"); cl_log(LOG_ERR, "REASON: %s", hb->llc_ops->errmsg(hb)); return cl_strdup(MSG_FAIL"nCannot end heartbeat link interface walk"); } } if (hb->llc_ops->end_nodewalk(hb) != HA_OK) { mgmt_log(LOG_ERR, "Cannot end node walk"); mgmt_log(LOG_ERR, "REASON: %s", hb->llc_ops->errmsg(hb)); cl_free(ret); return cl_strdup(MSG_FAIL); } return cl_strdup(MSG_OK"ngood"); }
之后即可在mgmt中使用status_hblinks命令即可查询心跳的状态,如:
[root@server-1 heartbeat-gui]# ./mgmtcmd.py status_hblinks --------------------------- ok has_dead