Commit a12e1152 authored by jingbo.wang's avatar jingbo.wang

node 非正常下线 告警 ok

parent 1db1ec83
......@@ -11,18 +11,21 @@ import (
func Load() {
config,err := ioutil.ReadFile(conf.GlobalConfig.StrategyConfPath)
if err != nil {
logger.Error.Fatal("未找到配置文件")
logger.Error.Print("!!!轮训监控,未找到配置文件!!!")
return
}
strategies := make([]Strategy, 0)
err = json.Unmarshal(config, &strategies)
if err != nil {
logger.Error.Fatal("策略文件格式错误:", err)
logger.Error.Printf("!!!策略文件格式错误:%s!!!\n", err)
return
}
if !CheckArray(strategies) {
logger.Error.Fatal("策略文件未通过校验")
logger.Error.Print("!!!策略文件未通过校验!!!")
return
}
sql := BuildSql(strategies)
......
......@@ -22,7 +22,7 @@ var HttpTimeOut = time.Second * 10
func init() {
HandlerMap.Store("aaaa", "http://www.baidasdfasdfasdfasdfu.com/")
// HandlerMap.Store("aaaa", "http://172.20.6.33:8989/service-down")
// HandlerMap.Store("aaaa", "http://172.20.6.33:8989/service-down")
//TODO 灵活配置
HandlerMap.Store("heimdallr", "http://172.20.6.33:8989/service-down")
}
......@@ -55,7 +55,7 @@ func handler(serviceName string) {
}
}
} else {
logger.Info.Print( " handler service: ", serviceName, " ", "not found handler hook api")
logger.Info.Print(" handler service: ", serviceName, " ", "not found handler hook api")
}
}
......@@ -65,32 +65,71 @@ func (w watch) DeleteService(serviceName string) {
//pass
}
func serviceStatus(service *registry.Service) bool {
for _, node := range service.NodeMap {
if node.Status == Passing {
return true
}
}
return false
}
func serviceStr(service *registry.Service) string {
rtn := service.Name + " "
for _, node := range service.NodeMap {
rtn += node.Id + ":" + node.Status + " "
}
return rtn
}
func (w watch) UpdateNodes(service *registry.Service) {
//获取服务状态
serviceOK := false
nodeStr := ""
servicesStatusLock.Lock()
defer servicesStatusLock.Unlock()
//单个节点挂了告警
if oldService, ok := servicesStatus[service.Name]; ok {
for _, node := range service.NodeMap {
nodeStr += node.Id + ":" + node.Status + " "
if node.Status == "passing" {
serviceOK = true
if oldNode, ok := oldService.NodeMap[node.Id]; ok {
if oldNode.Status == Passing && node.Status == Critical {
logger.Warning.Print(service.Name, " ", node.Id, "---!!!node critical!!!---")
_ = dingding.SenderDingDing(service.Name+" "+node.Id+" "+"test:---!!!node critical!!!---", dingding.DefaultDingURL)
}
}
}
}
//判断是否告警
if v, ok := servicesStatus.Load(service.Name); ok && v.(string) == Passing && !serviceOK {
logger.Warning.Print(service.Name, " ", nodeStr, "---!!!service critical!!!---")
_ = dingding.SenderDingDing(service.Name+" "+nodeStr+" "+"---!!!service critical!!!---", dingding.DefaultDingURL)
//整个服务挂了告警
//如果 服务存在,并且服务的old状态为passing,并且服务的now状态为critical,则报警,否贼记录服务状态
serviceString := serviceStr(service)
if oldService, ok := servicesStatus[service.Name]; ok && serviceStatus(oldService) && !serviceStatus(service) {
logger.Warning.Print(serviceString, "---!!!service critical!!!---")
_ = dingding.SenderDingDing(serviceString+"---!!!service critical!!!---", dingding.DefaultDingURL)
handler(service.Name)
} else {
logger.Info.Print(service.Name, " ", nodeStr)
logger.Info.Print(serviceString)
}
//更新服务状态
if serviceOK {
servicesStatus.Store(service.Name, Passing)
} else {
servicesStatus.Store(service.Name, Critical)
//深拷贝对象
newService := registry.NewService(service.Name)
for kk, vv := range service.NodeMap {
newNode := registry.Node{
ServiceName:vv.ServiceName,
Id:vv.Id,
Port:vv.Port,
Address:vv.Address,
Status:vv.Status,
}
for x, y := range vv.Meta {
newNode.Meta[x] = y
}
newService.NodeMap[kk] = &newNode
}
servicesStatus[service.Name] = newService
}
func (w watch) AddNode(node *registry.Node) {
......@@ -102,26 +141,12 @@ func (w watch) DelNode(node *registry.Node) {
}
var (
servicesStatus = new(sync.Map)
servicesStatus = make(map[string]*registry.Service)
servicesStatusLock = new(sync.Mutex)
)
func InitServiceStatus() {
services := registry.GetServiceMap()
for _, service := range services {
serviceOK := false
for _, node := range service.NodeMap {
if node.Status == Passing {
serviceOK = true
break
}
}
if serviceOK {
servicesStatus.Store(service.Name, Passing)
} else {
servicesStatus.Store(service.Name, Critical)
}
}
servicesStatus = registry.GetServiceMap()
}
func NodeCheck() {
......@@ -129,7 +154,7 @@ func NodeCheck() {
if e := recover(); e != nil {
logger.Info.Print("node check panic: ", e)
_ = dingding.SenderDingDing("node check panic!", dingding.DefaultDingURL)
time.Sleep(time.Second*1)
time.Sleep(time.Second * 1)
NodeCheck()
}
}()
......
......@@ -264,9 +264,18 @@ func (cr *consulRegistry) GetService(serviceName string) (*Service, error) {
//深拷贝
if v, ok := cr.serviceMap[serviceName]; ok {
rtn := NewService(serviceName)
rtn.TagMap = v.TagMap
for kk, vv := range v.NodeMap {
rtn.NodeMap[kk] = vv
newNode := Node{
ServiceName:vv.ServiceName,
Id:vv.Id,
Port:vv.Port,
Address:vv.Address,
Status:vv.Status,
}
for x, y := range vv.Meta {
newNode.Meta[x] = y
}
rtn.NodeMap[kk] = &newNode
}
return rtn, nil
} else {
......@@ -283,7 +292,22 @@ func (cr *consulRegistry) SetObserver(name string, observer Observer) error {
return errors.New("observer is exists")
} else {
for _, service := range cr.serviceMap {
observer.UpdateNodes(service)
//深拷贝对象
newService := NewService(service.Name)
for kk, vv := range service.NodeMap {
newNode := Node{
ServiceName:vv.ServiceName,
Id:vv.Id,
Port:vv.Port,
Address:vv.Address,
Status:vv.Status,
}
for x, y := range vv.Meta {
newNode.Meta[x] = y
}
newService.NodeMap[kk] = &newNode
}
observer.UpdateNodes(newService)
}
cr.observerMap.Store(name, observer)
}
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment