Commit a12e1152 authored by jingbo.wang's avatar jingbo.wang

node 非正常下线 告警 ok

parent 1db1ec83
...@@ -11,18 +11,21 @@ import ( ...@@ -11,18 +11,21 @@ import (
func Load() { func Load() {
config,err := ioutil.ReadFile(conf.GlobalConfig.StrategyConfPath) config,err := ioutil.ReadFile(conf.GlobalConfig.StrategyConfPath)
if err != nil { if err != nil {
logger.Error.Fatal("未找到配置文件") logger.Error.Print("!!!轮训监控,未找到配置文件!!!")
return
} }
strategies := make([]Strategy, 0) strategies := make([]Strategy, 0)
err = json.Unmarshal(config, &strategies) err = json.Unmarshal(config, &strategies)
if err != nil { if err != nil {
logger.Error.Fatal("策略文件格式错误:", err) logger.Error.Printf("!!!策略文件格式错误:%s!!!\n", err)
return
} }
if !CheckArray(strategies) { if !CheckArray(strategies) {
logger.Error.Fatal("策略文件未通过校验") logger.Error.Print("!!!策略文件未通过校验!!!")
return
} }
sql := BuildSql(strategies) sql := BuildSql(strategies)
......
...@@ -22,7 +22,7 @@ var HttpTimeOut = time.Second * 10 ...@@ -22,7 +22,7 @@ var HttpTimeOut = time.Second * 10
func init() { func init() {
HandlerMap.Store("aaaa", "http://www.baidasdfasdfasdfasdfu.com/") HandlerMap.Store("aaaa", "http://www.baidasdfasdfasdfasdfu.com/")
// HandlerMap.Store("aaaa", "http://172.20.6.33:8989/service-down") // HandlerMap.Store("aaaa", "http://172.20.6.33:8989/service-down")
//TODO 灵活配置 //TODO 灵活配置
HandlerMap.Store("heimdallr", "http://172.20.6.33:8989/service-down") HandlerMap.Store("heimdallr", "http://172.20.6.33:8989/service-down")
} }
...@@ -55,7 +55,7 @@ func handler(serviceName string) { ...@@ -55,7 +55,7 @@ func handler(serviceName string) {
} }
} }
} else { } else {
logger.Info.Print( " handler service: ", serviceName, " ", "not found handler hook api") logger.Info.Print(" handler service: ", serviceName, " ", "not found handler hook api")
} }
} }
...@@ -65,32 +65,71 @@ func (w watch) DeleteService(serviceName string) { ...@@ -65,32 +65,71 @@ func (w watch) DeleteService(serviceName string) {
//pass //pass
} }
func (w watch) UpdateNodes(service *registry.Service) { func serviceStatus(service *registry.Service) bool {
//获取服务状态
serviceOK := false
nodeStr := ""
for _, node := range service.NodeMap { for _, node := range service.NodeMap {
nodeStr += node.Id + ":" + node.Status + " " if node.Status == Passing {
if node.Status == "passing" { return true
serviceOK = true
} }
} }
//判断是否告警 return false
if v, ok := servicesStatus.Load(service.Name); ok && v.(string) == Passing && !serviceOK { }
logger.Warning.Print(service.Name, " ", nodeStr, "---!!!service critical!!!---")
_ = dingding.SenderDingDing(service.Name+" "+nodeStr+" "+"---!!!service critical!!!---", dingding.DefaultDingURL) func serviceStr(service *registry.Service) string {
rtn := service.Name + " "
for _, node := range service.NodeMap {
rtn += node.Id + ":" + node.Status + " "
}
return rtn
}
func (w watch) UpdateNodes(service *registry.Service) {
servicesStatusLock.Lock()
defer servicesStatusLock.Unlock()
//单个节点挂了告警
if oldService, ok := servicesStatus[service.Name]; ok {
for _, node := range service.NodeMap {
if oldNode, ok := oldService.NodeMap[node.Id]; ok {
if oldNode.Status == Passing && node.Status == Critical {
logger.Warning.Print(service.Name, " ", node.Id, "---!!!node critical!!!---")
_ = dingding.SenderDingDing(service.Name+" "+node.Id+" "+"test:---!!!node critical!!!---", dingding.DefaultDingURL)
}
}
}
}
//整个服务挂了告警
//如果 服务存在,并且服务的old状态为passing,并且服务的now状态为critical,则报警,否贼记录服务状态
serviceString := serviceStr(service)
if oldService, ok := servicesStatus[service.Name]; ok && serviceStatus(oldService) && !serviceStatus(service) {
logger.Warning.Print(serviceString, "---!!!service critical!!!---")
_ = dingding.SenderDingDing(serviceString+"---!!!service critical!!!---", dingding.DefaultDingURL)
handler(service.Name) handler(service.Name)
} else { } else {
logger.Info.Print(service.Name, " ", nodeStr) logger.Info.Print(serviceString)
} }
//更新服务状态 //更新服务状态
if serviceOK { //深拷贝对象
servicesStatus.Store(service.Name, Passing) newService := registry.NewService(service.Name)
} else { for kk, vv := range service.NodeMap {
servicesStatus.Store(service.Name, Critical) newNode := registry.Node{
ServiceName:vv.ServiceName,
Id:vv.Id,
Port:vv.Port,
Address:vv.Address,
Status:vv.Status,
}
for x, y := range vv.Meta {
newNode.Meta[x] = y
}
newService.NodeMap[kk] = &newNode
} }
servicesStatus[service.Name] = newService
} }
func (w watch) AddNode(node *registry.Node) { func (w watch) AddNode(node *registry.Node) {
...@@ -102,26 +141,12 @@ func (w watch) DelNode(node *registry.Node) { ...@@ -102,26 +141,12 @@ func (w watch) DelNode(node *registry.Node) {
} }
var ( var (
servicesStatus = new(sync.Map) servicesStatus = make(map[string]*registry.Service)
servicesStatusLock = new(sync.Mutex)
) )
func InitServiceStatus() { func InitServiceStatus() {
services := registry.GetServiceMap() servicesStatus = registry.GetServiceMap()
for _, service := range services {
serviceOK := false
for _, node := range service.NodeMap {
if node.Status == Passing {
serviceOK = true
break
}
}
if serviceOK {
servicesStatus.Store(service.Name, Passing)
} else {
servicesStatus.Store(service.Name, Critical)
}
}
} }
func NodeCheck() { func NodeCheck() {
...@@ -129,7 +154,7 @@ func NodeCheck() { ...@@ -129,7 +154,7 @@ func NodeCheck() {
if e := recover(); e != nil { if e := recover(); e != nil {
logger.Info.Print("node check panic: ", e) logger.Info.Print("node check panic: ", e)
_ = dingding.SenderDingDing("node check panic!", dingding.DefaultDingURL) _ = dingding.SenderDingDing("node check panic!", dingding.DefaultDingURL)
time.Sleep(time.Second*1) time.Sleep(time.Second * 1)
NodeCheck() NodeCheck()
} }
}() }()
......
...@@ -264,9 +264,18 @@ func (cr *consulRegistry) GetService(serviceName string) (*Service, error) { ...@@ -264,9 +264,18 @@ func (cr *consulRegistry) GetService(serviceName string) (*Service, error) {
//深拷贝 //深拷贝
if v, ok := cr.serviceMap[serviceName]; ok { if v, ok := cr.serviceMap[serviceName]; ok {
rtn := NewService(serviceName) rtn := NewService(serviceName)
rtn.TagMap = v.TagMap
for kk, vv := range v.NodeMap { for kk, vv := range v.NodeMap {
rtn.NodeMap[kk] = vv newNode := Node{
ServiceName:vv.ServiceName,
Id:vv.Id,
Port:vv.Port,
Address:vv.Address,
Status:vv.Status,
}
for x, y := range vv.Meta {
newNode.Meta[x] = y
}
rtn.NodeMap[kk] = &newNode
} }
return rtn, nil return rtn, nil
} else { } else {
...@@ -283,7 +292,22 @@ func (cr *consulRegistry) SetObserver(name string, observer Observer) error { ...@@ -283,7 +292,22 @@ func (cr *consulRegistry) SetObserver(name string, observer Observer) error {
return errors.New("observer is exists") return errors.New("observer is exists")
} else { } else {
for _, service := range cr.serviceMap { for _, service := range cr.serviceMap {
observer.UpdateNodes(service) //深拷贝对象
newService := NewService(service.Name)
for kk, vv := range service.NodeMap {
newNode := Node{
ServiceName:vv.ServiceName,
Id:vv.Id,
Port:vv.Port,
Address:vv.Address,
Status:vv.Status,
}
for x, y := range vv.Meta {
newNode.Meta[x] = y
}
newService.NodeMap[kk] = &newNode
}
observer.UpdateNodes(newService)
} }
cr.observerMap.Store(name, observer) cr.observerMap.Store(name, observer)
} }
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment