Commit 463e2b06 authored by jingbo.wang's avatar jingbo.wang

总表完成

parent f7a265d6
......@@ -6,7 +6,6 @@ import (
"git.quantgroup.cn/DevOps/enoch/pkg/global"
"git.quantgroup.cn/DevOps/enoch/pkg/glog"
"git.quantgroup.cn/DevOps/enoch/pkg/points"
"git.quantgroup.cn/DevOps/enoch/pkg/report-form"
"github.com/Shopify/sarama"
_ "github.com/mkevac/debugcharts"
"net/http"
......@@ -112,7 +111,7 @@ func main() {
dao.DbInit()
//健康状态报表
report_form.RegularReport(global.ReportFormDir)
// report_form.RegularReport(global.ReportFormDir)
//TODO 告警策略
}
......
......@@ -18,7 +18,7 @@ func SendEmail(title string, content string, receiver ...string) {
//m.SetHeader("To", []string{receiver})
m.SetHeader("To", receiver...)
m.SetHeader("Subject", title)
m.SetBody("text/html", "<div><pre>"+content+"</pre></div>")
m.SetBody("text/html", content)
userName := "program@quantgroup.cn"
pwd := "Fuck147999!!!"
......
......@@ -24,7 +24,7 @@ type Path struct {
averageDuration time.Duration //平均响应时间
sumDuration time.Duration //总响应时间
medianDuration time.Duration //中位响应时间
qps int //每秒的访问量
qps float64 //每秒的访问量
variance float64 //标准差
maxDurationTracePoint TracePoint //最大响应时间的TracePoint
}
......@@ -53,7 +53,7 @@ func (p *Path) initQps() {
if p.startTime.Unix() >= p.endTime.Unix() {
return
}
p.qps = p.count / int(p.endTime.Sub(p.startTime).Seconds())
p.qps = float64(p.count) / p.endTime.Sub(p.startTime).Seconds()
}
//初始化访问次数
......@@ -193,16 +193,16 @@ func (p *Path) GetCount() int {
return p.count
}
//获取qps
func (p *Path) GetQps() float64 {
return p.qps
}
//获取总响应时间
func (p *Path) GetSumDuration() time.Duration {
return p.sumDuration
}
//获取qps
func (p *Path) GetQps() int {
return p.qps
}
//获取平均响应时间
func (p *Path) GetAverageDuration() time.Duration {
return p.averageDuration
......
package report_form
import (
"encoding/json"
"git.quantgroup.cn/DevOps/enoch/pkg/email"
"git.quantgroup.cn/DevOps/enoch/pkg/global"
"git.quantgroup.cn/DevOps/enoch/pkg/glog"
"github.com/vrg0/go-common/util"
"io/ioutil"
"runtime/debug"
"time"
)
/*
var (
receiverList = make([]string, 0)
serviceOwner = make(map[string][]string)
......@@ -68,7 +58,7 @@ func run(now time.Time, dir string) {
fileNamePrefix := now.Format("2006-01-02")
//周表
if now.Weekday() == time.Monday {
if now.Weekday() == time.Tuesday {
sm := GeneralTableNewSM(7)
//总表
......@@ -103,3 +93,4 @@ func run(now time.Time, dir string) {
}
}
}
*/
package report_form
import "testing"
/*
func TestRegularReport(t *testing.T) {
RegularReport("/var/enoch_health_table")
select {}
}
*/
......@@ -3,117 +3,314 @@ package report_form
import (
"errors"
"fmt"
"math"
"strings"
"time"
"unicode/utf8"
)
type Table struct {
name string
keyList []string
maxLenValueList []int
recordList [][]string
}
func NewTable(name string, keys ...string) *Table {
maxLenValueList := make([]int, len(keys))
for i, key := range keys {
maxLenValueList[i] = strFormatLen(key)
}
const (
timeFormat = "2006-01-02T15:04:05"
colorRed = "red"
colorBlack = "black"
cpuMin = 1
memMin = 1
diskMin = 1
)
return &Table{
name: name,
keyList: keys,
recordList: make([][]string, 0),
maxLenValueList: maxLenValueList,
}
}
var (
cstZone = time.FixedZone("CST", 8*3600)
maxDuration = time.Millisecond * 600
)
func strFormatLen(s string) int {
l := 0
for i := 0; i < len(s); {
c, size := utf8.DecodeRuneInString(s[i:])
if size != 1 && c != 'µ' {
l += 2
} else {
l++
}
i += size
//Human-readable number
func hrn(n int) string {
num := float64(n)
unit := float64(1024)
if num < unit {
return fmt.Sprintf("%d", int(num))
}
return l
exp := int(math.Log(num) / math.Log(unit))
pre := "kMGTPE"
pre = pre[exp-1 : exp]
r := float64(n) / math.Pow(unit, float64(exp))
return fmt.Sprintf("%.3f%s", r, pre)
}
func (t *Table) ToString() string {
const spacing = 4
//总表
func ServiceMapReportForm(startTime time.Time, endTime time.Time) string {
rtn := new(strings.Builder)
sm := NewServiceMap(startTime, endTime)
var t *Table = nil
//标题
totalLength := spacing * (len(t.maxLenValueList) - 1)
for _, valueLen := range t.maxLenValueList {
totalLength += valueLen
//表头
rtn.WriteString("<div><pre>\n")
rtn.WriteString(" 服务健康状态总表\n")
rtn.WriteString(fmt.Sprintf("时间:%s ~ %s\n",
startTime.In(cstZone).Format(time.RFC3339),
endTime.In(cstZone).Format(time.RFC3339)),
)
rtn.WriteString(fmt.Sprintf("总访问量:%s 总响应时间:%s 平均响应时间:%s QPS:%.2f\n",
hrn(sm.GetCount()),
sm.GetSumDuration()/1e6*1e6, //精度保留到毫秒
sm.GetAverageDuration()/1e6*1e6, //精度保留到毫秒
sm.GetQps()),
)
rtn.WriteString("\n")
rtn.WriteString("阅读说明:\n")
rtn.WriteString("1、易读数字:单位K、M、G、T、P、E,进制1024\n")
rtn.WriteString("2、排名:排名越高健康状态越差,如果排名标红,则表示存在风险\n")
rtn.WriteString("3、CPU、内存、磁盘:服务有多个节点,总表中CPU、内存、磁盘取的是服务节点中的最高值\n")
rtn.WriteString("4、常规cpu使用率:去针尖后的最高CPU使用率,针尖指的是CPU使用率暴增的场景\n")
rtn.WriteString("\n\n")
//服务平均响应时间排行
t = NewTable("服务平均响应时间排行", "排名", "响应时间", "服务名称", "访问量", "QPS")
for i, s := range sm.GetAverageDurationServiceList() {
color := colorBlack
if s.GetAverageDuration() > maxDuration {
color = colorRed
}
_ = t.AddRecord(
color,
fmt.Sprintf("No.%d", i+1),
fmt.Sprintf("%v", s.GetAverageDuration()/1e6*1e6), //精度毫秒
s.Name(),
hrn(s.GetCount()),
fmt.Sprintf("%.2f", s.GetQps()),
)
}
rtn.WriteString(t.ToHtml())
rtn.WriteString("\n\n")
//服务接口响应时间中位值排行
t = NewTable("服务接口中位响应时间排行", "排名", "响应时间", "服务名称", "接口", "访问量", "QPS")
for i, p := range sm.GetMedianDurationPathList() {
color := colorBlack
if p.GetMedianDuration() > maxDuration {
color = colorRed
}
_ = t.AddRecord(
color,
fmt.Sprintf("No.%d", i+1),
fmt.Sprintf("%v", p.GetMedianDuration()/1e6*1e6), //精度保留毫秒
p.GetServiceName(),
p.GetPath(),
hrn(p.GetCount()),
fmt.Sprintf("%.2f", p.GetQps()),
)
}
preBlankLen := (totalLength - strFormatLen(t.name)) / 2
postBlankLen := totalLength - preBlankLen - strFormatLen(t.name)
rtn.WriteString(t.ToHtml())
rtn.WriteString("\n\n")
//服务接口响应时间平均值排行
t = NewTable("服务接口平均响应时间排行", "排名", "响应时间", "服务名称", "接口", "访问量", "QPS")
for i, p := range sm.GetAverageDurationPathList() {
color := colorBlack
if p.GetAverageDuration() > maxDuration {
color = colorRed
}
_ = t.AddRecord(
color,
fmt.Sprintf("No.%d", i+1),
fmt.Sprintf("%v", p.GetAverageDuration()/1e6*1e6), //精度保留毫秒
p.GetServiceName(),
p.GetPath(),
hrn(p.GetCount()),
fmt.Sprintf("%.2f", p.GetQps()),
)
}
rtn.WriteString(t.ToHtml())
rtn.WriteString("\n\n")
//响应时间最高的请求排行
t = NewTable("响应时间最高的请求排行", "排名", "响应时间", "服务名称", "接口", "时间戳", "trace_id")
for i, tp := range sm.GetMaxDurationTracePointList() {
color := colorBlack
if tp.Duration > maxDuration {
color = colorRed
}
_ = t.AddRecord(
color,
fmt.Sprintf("No.%d", i+1),
fmt.Sprintf("%v", tp.Duration/1e6*1e6), //精度保留毫秒
tp.ServiceName,
tp.Path,
tp.Timestamp.In(cstZone).Format(timeFormat),
tp.TraceId,
)
}
rtn.WriteString(t.ToHtml())
rtn.WriteString("\n\n")
for i := 0; i < preBlankLen; i++ {
rtn.WriteString("-")
//常规cpu使用率排行
t = NewTable("常规cpu使用率排行", "排名", "cpu使用率", "服务名称", "访问量", "平均响应时间", "QPS")
for i, s := range sm.GetRemoveN100MaxCpuServiceList() {
if s.GetRemoveN100MaxCpu() < cpuMin {
break
}
color := colorBlack
if s.GetRemoveN100MaxCpu() > 50 { //超过一半标红
color = colorRed
}
_ = t.AddRecord(
color,
fmt.Sprintf("No.%d", i+1),
fmt.Sprintf("%d%%", s.GetRemoveN100MaxCpu()),
s.Name(),
hrn(s.GetCount()),
fmt.Sprintf("%v", s.GetAverageDuration()/1e6*1e6),
fmt.Sprintf("%.2f", s.GetQps()),
)
}
rtn.WriteString(t.name)
for i := 0; i < postBlankLen; i++ {
rtn.WriteString("-")
rtn.WriteString(t.ToHtml())
rtn.WriteString("\n\n")
//峰值cpu使用率排行
t = NewTable("峰值cpu使用率排行", "排名", "cpu使用率", "服务名称", "访问量", "平均响应时间", "QPS")
for i, s := range sm.GetMaxCpuServiceList() {
if s.GetMaxCpu() < cpuMin {
break
}
color := colorBlack
if s.GetMaxCpu() > 80 { //超过80标红
color = colorRed
}
_ = t.AddRecord(
color,
fmt.Sprintf("No.%d", i+1),
fmt.Sprintf("%d%%", s.GetMaxCpu()),
s.Name(),
hrn(s.GetCount()),
fmt.Sprintf("%v", s.GetAverageDuration()/1e6*1e6),
fmt.Sprintf("%.2f", s.GetQps()),
)
}
rtn.WriteString("\n")
rtn.WriteString(t.ToHtml())
rtn.WriteString("\n\n")
//key
for i, key := range t.keyList {
rtn.WriteString(key)
//平均内存使用率排行
t = NewTable("平均内存使用率排行", "排名", "内存使用率", "服务名称", "访问量", "平均响应时间", "QPS")
for i, s := range sm.GetAverageMemServiceList() {
if s.GetAverageMem() < memMin {
break
}
color := colorBlack
if s.GetAverageMem() > 50 { //超过50标红
color = colorRed
}
_ = t.AddRecord(
color,
fmt.Sprintf("No.%d", i+1),
fmt.Sprintf("%d%%", s.GetAverageMem()),
s.Name(),
hrn(s.GetCount()),
fmt.Sprintf("%v", s.GetAverageDuration()/1e6*1e6),
fmt.Sprintf("%.2f", s.GetQps()),
)
}
rtn.WriteString(t.ToHtml())
rtn.WriteString("\n\n")
//补全空格,最后一行结尾不打印分隔空白
blankLen := t.maxLenValueList[i] - strFormatLen(key) + spacing
if i == len(t.keyList)-1 {
blankLen -= spacing
//峰值内存使用率排行
t = NewTable("峰值内存使用率排行", "排名", "内存使用率", "服务名称", "访问量", "平均响应时间", "QPS")
for i, s := range sm.GetMaxMemServiceList() {
if s.GetMaxMem() < memMin {
break
}
for i := 0; i < blankLen; i++ {
rtn.WriteString(" ")
color := colorBlack
if s.GetMaxMem() > 80 { //超过80标红
color = colorRed
}
_ = t.AddRecord(
color,
fmt.Sprintf("No.%d", i+1),
fmt.Sprintf("%d%%", s.GetMaxMem()),
s.Name(),
hrn(s.GetCount()),
fmt.Sprintf("%v", s.GetAverageDuration()/1e6*1e6),
fmt.Sprintf("%.2f", s.GetQps()),
)
}
rtn.WriteString("\n")
rtn.WriteString(t.ToHtml())
rtn.WriteString("\n\n")
//value
for _, valueList := range t.recordList {
for i, value := range valueList {
rtn.WriteString(value)
blankLen := t.maxLenValueList[i] - strFormatLen(value) + spacing
if i == len(t.keyList)-1 {
blankLen -= spacing
}
for i := 0; i < blankLen; i++ {
rtn.WriteString(" ")
}
//峰值磁盘使用率排行
t = NewTable("峰值硬盘使用率排行", "排名", "硬盘使用率", "服务名称")
for i, s := range sm.GetMaxDiskServiceList() {
if s.GetMaxDisk() < diskMin {
break
}
color := colorBlack
if s.GetMaxDisk() > 80 {
color = colorRed
}
rtn.WriteString("\n")
_ = t.AddRecord(
color,
fmt.Sprintf("No.%d", i+1),
fmt.Sprintf("%d%%", s.GetMaxDisk()),
s.Name(),
)
}
rtn.WriteString(t.ToHtml())
rtn.WriteString("\n\n")
rtn.WriteString("</pre></div>\n")
return rtn.String()
}
//添加记录
func (t *Table) AddRecord(values ...interface{}) error {
type Line struct {
color string
values []string
}
type Table struct {
name string
keyList []string
recordList []Line
}
func NewTable(name string, keys ...string) *Table {
return &Table{
name: name,
keyList: keys,
recordList: make([]Line, 0),
}
}
func (t *Table) AddRecord(color string, values ...string) error {
if len(values) != len(t.keyList) {
return errors.New("len(values) != len(keyList)")
}
//interface转string,并且记录最大长度
valueList := make([]string, 0)
for i, v := range values {
value := fmt.Sprint(v)
valueList = append(valueList, value)
if strFormatLen(value) > t.maxLenValueList[i] {
t.maxLenValueList[i] = strFormatLen(value)
t.recordList = append(t.recordList, Line{color: color, values: values})
return nil
}
func (t *Table) ToHtml() string {
rtn := new(strings.Builder)
rtn.WriteString("<table>\n")
rtn.WriteString("\t<style>table{border-spacing: 20px 0px}</style>\n")
rtn.WriteString(fmt.Sprintf("\t<caption>%s</caption>\n", t.name))
rtn.WriteString("\t<tr>")
for _, k := range t.keyList {
rtn.WriteString(fmt.Sprintf("<td>%s</td>", k))
}
rtn.WriteString("</tr>\n")
for _, x := range t.recordList {
rtn.WriteString("\t<tr>")
for _, y := range x.values {
rtn.WriteString(fmt.Sprintf("<td><font color=\"%s\">%s</font></td>", x.color, y))
}
rtn.WriteString("</tr>\n")
}
rtn.WriteString("</table>\n")
return rtn.String()
}
/*
//添加记录
t.recordList = append(t.recordList, valueList)
return nil
}
......@@ -170,115 +367,6 @@ func SubTableRun(s *Service) string {
return rtn.String()
}
//总表报表
func GeneralTableRun(sm *ServiceMap) string {
const (
cpuMin = 1
memMin = 1
diskMin = 1
)
//生成报表
rtn := new(strings.Builder)
//表头
rtn.WriteString(" 服务健康状态总表\n")
//时间
rtn.WriteString(fmt.Sprintf("时间:%s ~ %s\n",
sm.startTime.Format(time.RFC3339), sm.endTime.Format(time.RFC3339)))
//访问量
rtn.WriteString(fmt.Sprintf("总访问量:%v 总响应时间:%v 平均响应时间:%v\n\n",
sm.GetCount(), sm.GetSumDuration(), sm.GetAverageDuration()))
//服务平均响应时间排行
serviceTable := NewTable("服务平均响应时间排行", "响应时间", "服务名称", "访问量")
serviceList := sm.GetAverageDurationServiceList()
for _, s := range serviceList {
_ = serviceTable.AddRecord(s.GetAverageDuration(), s.Name(), s.GetCount())
}
rtn.WriteString(serviceTable.ToString())
rtn.WriteString("\n")
//服务响应时间中位值排行
medianTable := NewTable("服务接口中位响应时间排行", "响应时间", "服务名称", "接口", "访问量")
medianList := sm.GetMedianDurationPathList()
for _, m := range medianList {
_ = medianTable.AddRecord(m.GetMedianDuration(), m.GetServiceName(), m.GetPath(), m.GetCount())
}
rtn.WriteString(medianTable.ToString())
rtn.WriteString("\n")
//服务响应时间平均值排行
averageTable := NewTable("服务接口平均响应时间排行", "响应时间", "服务名称", "接口", "访问量")
averageList := sm.GetAverageDurationPathList()
for _, m := range averageList {
_ = averageTable.AddRecord(m.GetAverageDuration(), m.GetServiceName(), m.GetPath(), m.GetCount())
}
rtn.WriteString(averageTable.ToString())
rtn.WriteString("\n")
//响应时间最长的请求排行
maxTable := NewTable("服务接口最高响应时间排行", "响应时间", "服务名称", "接口", "时间戳", "trace_id")
maxList := sm.GetMaxDurationTracePointList()
for _, m := range maxList {
_ = maxTable.AddRecord(m.Duration, m.ServiceName, m.Path, m.Timestamp, m.TraceId)
}
rtn.WriteString(maxTable.ToString())
rtn.WriteString("\n")
//去topN100后的峰值cpu使用率排行
maxRemoveN100CpuTable := NewTable("常规cpu使用率排行", "cpu使用率", "服务名称", "访问量", "平均响应时间")
maxRemoveN100CpuList := sm.GetRemoveN100MaxCpuServiceList()
for _, s := range maxRemoveN100CpuList {
if s.GetRemoveN100MaxCpu() < cpuMin {
break
}
_ = maxRemoveN100CpuTable.AddRecord(s.GetRemoveN100MaxCpu(), s.Name(), s.GetCount(), s.GetAverageDuration())
}
rtn.WriteString(maxRemoveN100CpuTable.ToString())
rtn.WriteString("\n")
//峰值cpu使用率排行
maxCpuTable := NewTable("峰值cpu使用率排行", "cpu使用率", "服务名称", "访问量", "平均响应时间")
maxCpuList := sm.GetMaxCpuServiceList()
for _, s := range maxCpuList {
if s.GetMaxCpu() < cpuMin {
break
}
_ = maxCpuTable.AddRecord(s.GetMaxCpu(), s.Name(), s.GetCount(), s.GetAverageDuration())
}
rtn.WriteString(maxCpuTable.ToString())
rtn.WriteString("\n")
//内存峰值使用率排行
memTable := NewTable("峰值内存使用率排行", "内存使用率", "服务名称", "访问量", "平均响应时间")
memList := sm.GetMaxMemServiceList()
for _, s := range memList {
if s.GetMaxMem() < memMin {
break
}
_ = memTable.AddRecord(s.GetMaxMem(), s.Name(), s.GetCount(), s.GetAverageDuration())
}
rtn.WriteString(memTable.ToString())
rtn.WriteString("\n")
//硬盘峰值使用率排行
diskTable := NewTable("峰值硬盘使用率排行", "硬盘使用率", "服务名称", "访问量", "平均响应时间")
diskList := sm.GetMaxDiskServiceList()
for _, s := range diskList {
if s.GetMaxDisk() < diskMin {
break
}
_ = diskTable.AddRecord(s.GetMaxDisk(), s.Name(), s.GetCount(), s.GetAverageDuration())
}
rtn.WriteString(diskTable.ToString())
rtn.WriteString("\n")
return rtn.String()
}
func GeneralTableNewSM(n int) *ServiceMap {
//求前n天的数据,按天取整
now := time.Now()
......@@ -294,3 +382,4 @@ func GeneralTableNewSM(n int) *ServiceMap {
return sm
}
*/
......@@ -3,9 +3,15 @@ package report_form
import (
"fmt"
"testing"
"unicode/utf8"
"time"
)
func TestRun(t *testing.T) {
smForm := ServiceMapReportForm(time.Now().AddDate(0, 0, -10), time.Now())
fmt.Println(smForm)
}
/*
func TestRun(t *testing.T) {
sm := GeneralTableNewSM(7)
fmt.Println(GeneralTableRun(sm))
......@@ -19,3 +25,4 @@ func TestUtf8StringLen(t *testing.T) {
i += size
}
}
*/
......@@ -24,7 +24,7 @@ type Service struct {
maxMedianPath *Path //最大中位响应时间path
maxAveragePath *Path //最大平均响应时间path
maxDurationTracePoint *TracePoint //最大响应时间的tracePoint
qps int
qps float64 //QPS
//node相关
nodeMap map[string]*Node //节点列表
}
......@@ -225,7 +225,7 @@ func (s *Service) GetAverageDurationPathList() []*Path {
}
//获取qps
func (s *Service) GetQps() int {
func (s *Service) GetQps() float64 {
return s.qps
}
......@@ -295,18 +295,20 @@ func (s *Service) GetMaxDurationTracePoint() (*TracePoint, bool) {
return s.maxDurationTracePoint, s.maxDurationTracePoint != nil
}
//求平均
//求最大
func (s *Service) getNodeMapValue(f func(node *Node) int) int {
if len(s.nodeMap) == 0 {
return 0
}
sum := 0
max := 0
for _, node := range s.nodeMap {
sum += f(node)
if f(node) > max {
max = f(node)
}
}
return sum / len(s.nodeMap)
return max
}
func (s *Service) GetAverageCpu() int {
......
......@@ -126,6 +126,23 @@ func (sm *ServiceMap) GetMaxMemServiceList() []*Service {
return rtn
}
//获取平均内存使用率列表
func (sm *ServiceMap) GetAverageMemServiceList() []*Service {
rtn := make([]*Service, 0)
for _, s := range sm.serviceMap {
rtn = append(rtn, s)
}
sort.Slice(rtn, func(i, j int) bool {
if rtn[i].GetAverageMem() > rtn[j].GetAverageMem() {
return true
}
return false
})
return rtn
}
//获取峰值磁盘使用率列表
func (sm *ServiceMap) GetMaxDiskServiceList() []*Service {
rtn := make([]*Service, 0)
......@@ -144,8 +161,8 @@ func (sm *ServiceMap) GetMaxDiskServiceList() []*Service {
}
//获取qps
func (sm *ServiceMap) GetQps() int {
rtn := 0
func (sm *ServiceMap) GetQps() float64 {
rtn := float64(0)
for _, s := range sm.serviceMap {
rtn += s.GetQps()
}
......@@ -245,6 +262,11 @@ func (sm *ServiceMap) GetCount() int {
return sum
}
//获取service map
func (sm *ServiceMap) GetServiceMap() map[string]*Service {
return sm.serviceMap
}
//获取总响应时间
func (sm *ServiceMap) GetSumDuration() time.Duration {
sum := time.Duration(0)
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment