initial import

This commit is contained in:
kevin
2020-07-26 17:09:05 +08:00
commit 7e3a369a8f
647 changed files with 54754 additions and 0 deletions

70
core/stat/alert.go Normal file
View File

@@ -0,0 +1,70 @@
// +build linux
package stat
import (
"flag"
"fmt"
"strings"
"sync"
"sync/atomic"
"time"
"zero/core/executors"
"zero/core/proc"
"zero/core/sysx"
"zero/core/timex"
"zero/core/utils"
)
const (
clusterNameKey = "CLUSTER_NAME"
testEnv = "test.v"
timeFormat = "2006-01-02 15:04:05"
)
var (
reporter = utils.Report
lock sync.RWMutex
lessExecutor = executors.NewLessExecutor(time.Minute * 5)
dropped int32
clusterName = proc.Env(clusterNameKey)
)
func init() {
if flag.Lookup(testEnv) != nil {
SetReporter(nil)
}
}
func Report(msg string) {
lock.RLock()
fn := reporter
lock.RUnlock()
if fn != nil {
reported := lessExecutor.DoOrDiscard(func() {
var builder strings.Builder
fmt.Fprintf(&builder, "%s\n", timex.Time().Format(timeFormat))
if len(clusterName) > 0 {
fmt.Fprintf(&builder, "cluster: %s\n", clusterName)
}
fmt.Fprintf(&builder, "host: %s\n", sysx.Hostname())
dp := atomic.SwapInt32(&dropped, 0)
if dp > 0 {
fmt.Fprintf(&builder, "dropped: %d\n", dp)
}
builder.WriteString(strings.TrimSpace(msg))
fn(builder.String())
})
if !reported {
atomic.AddInt32(&dropped, 1)
}
}
}
func SetReporter(fn func(string)) {
lock.Lock()
defer lock.Unlock()
reporter = fn
}

View File

@@ -0,0 +1,9 @@
// +build !linux
package stat
func Report(string) {
}
func SetReporter(func(string)) {
}

View File

@@ -0,0 +1,168 @@
package internal
import (
"fmt"
"os"
"path"
"strconv"
"strings"
"zero/core/iox"
"zero/core/lang"
)
const cgroupDir = "/sys/fs/cgroup"
type cgroup struct {
cgroups map[string]string
}
func (c *cgroup) acctUsageAllCpus() (uint64, error) {
data, err := iox.ReadText(path.Join(c.cgroups["cpuacct"], "cpuacct.usage"))
if err != nil {
return 0, err
}
return parseUint(string(data))
}
func (c *cgroup) acctUsagePerCpu() ([]uint64, error) {
data, err := iox.ReadText(path.Join(c.cgroups["cpuacct"], "cpuacct.usage_percpu"))
if err != nil {
return nil, err
}
var usage []uint64
for _, v := range strings.Fields(string(data)) {
u, err := parseUint(v)
if err != nil {
return nil, err
}
usage = append(usage, u)
}
return usage, nil
}
func (c *cgroup) cpuQuotaUs() (int64, error) {
data, err := iox.ReadText(path.Join(c.cgroups["cpu"], "cpu.cfs_quota_us"))
if err != nil {
return 0, err
}
return strconv.ParseInt(string(data), 10, 64)
}
func (c *cgroup) cpuPeriodUs() (uint64, error) {
data, err := iox.ReadText(path.Join(c.cgroups["cpu"], "cpu.cfs_period_us"))
if err != nil {
return 0, err
}
return parseUint(string(data))
}
func (c *cgroup) cpus() ([]uint64, error) {
data, err := iox.ReadText(path.Join(c.cgroups["cpuset"], "cpuset.cpus"))
if err != nil {
return nil, err
}
return parseUints(string(data))
}
func currentCgroup() (*cgroup, error) {
cgroupFile := fmt.Sprintf("/proc/%d/cgroup", os.Getpid())
lines, err := iox.ReadTextLines(cgroupFile, iox.WithoutBlank())
if err != nil {
return nil, err
}
cgroups := make(map[string]string)
for _, line := range lines {
cols := strings.Split(line, ":")
if len(cols) != 3 {
return nil, fmt.Errorf("invalid cgroup line: %s", line)
}
subsys := cols[1]
// only read cpu staff
if !strings.HasPrefix(subsys, "cpu") {
continue
}
cgroups[subsys] = path.Join(cgroupDir, subsys)
if strings.Contains(subsys, ",") {
for _, k := range strings.Split(subsys, ",") {
cgroups[k] = path.Join(cgroupDir, k)
}
}
}
return &cgroup{
cgroups: cgroups,
}, nil
}
func parseUint(s string) (uint64, error) {
v, err := strconv.ParseInt(s, 10, 64)
if err != nil {
if err.(*strconv.NumError).Err == strconv.ErrRange {
return 0, nil
} else {
return 0, fmt.Errorf("cgroup: bad int format: %s", s)
}
} else {
if v < 0 {
return 0, nil
} else {
return uint64(v), nil
}
}
}
func parseUints(val string) ([]uint64, error) {
if val == "" {
return nil, nil
}
ints := make(map[uint64]lang.PlaceholderType)
cols := strings.Split(val, ",")
for _, r := range cols {
if strings.Contains(r, "-") {
fields := strings.SplitN(r, "-", 2)
min, err := parseUint(fields[0])
if err != nil {
return nil, fmt.Errorf("cgroup: bad int list format: %s", val)
}
max, err := parseUint(fields[1])
if err != nil {
return nil, fmt.Errorf("cgroup: bad int list format: %s", val)
}
if max < min {
return nil, fmt.Errorf("cgroup: bad int list format: %s", val)
}
for i := min; i <= max; i++ {
ints[i] = lang.Placeholder
}
} else {
v, err := parseUint(r)
if err != nil {
return nil, err
}
ints[v] = lang.Placeholder
}
}
var sets []uint64
for k := range ints {
sets = append(sets, k)
}
return sets, nil
}

View File

@@ -0,0 +1,148 @@
package internal
import (
"errors"
"fmt"
"strings"
"time"
"zero/core/iox"
"zero/core/lang"
)
const (
cpuTicks = 100
cpuFields = 8
)
var (
preSystem uint64
preTotal uint64
quota float64
cores uint64
)
func init() {
cpus, err := perCpuUsage()
lang.Must(err)
cores = uint64(len(cpus))
sets, err := cpuSets()
lang.Must(err)
quota = float64(len(sets))
cq, err := cpuQuota()
if err == nil {
if cq != -1 {
period, err := cpuPeriod()
lang.Must(err)
limit := float64(cq) / float64(period)
if limit < quota {
quota = limit
}
}
}
preSystem, err = systemCpuUsage()
lang.Must(err)
preTotal, err = totalCpuUsage()
lang.Must(err)
}
func RefreshCpu() uint64 {
total, err := totalCpuUsage()
if err != nil {
return 0
}
system, err := systemCpuUsage()
if err != nil {
return 0
}
var usage uint64
cpuDelta := total - preTotal
systemDelta := system - preSystem
if cpuDelta > 0 && systemDelta > 0 {
usage = uint64(float64(cpuDelta*cores*1e3) / (float64(systemDelta) * quota))
}
preSystem = system
preTotal = total
return usage
}
func cpuQuota() (int64, error) {
cg, err := currentCgroup()
if err != nil {
return 0, err
}
return cg.cpuQuotaUs()
}
func cpuPeriod() (uint64, error) {
cg, err := currentCgroup()
if err != nil {
return 0, err
}
return cg.cpuPeriodUs()
}
func cpuSets() ([]uint64, error) {
cg, err := currentCgroup()
if err != nil {
return nil, err
}
return cg.cpus()
}
func perCpuUsage() ([]uint64, error) {
cg, err := currentCgroup()
if err != nil {
return nil, err
}
return cg.acctUsagePerCpu()
}
func systemCpuUsage() (uint64, error) {
lines, err := iox.ReadTextLines("/proc/stat", iox.WithoutBlank())
if err != nil {
return 0, err
}
for _, line := range lines {
fields := strings.Fields(line)
if fields[0] == "cpu" {
if len(fields) < cpuFields {
return 0, fmt.Errorf("bad format of cpu stats")
}
var totalClockTicks uint64
for _, i := range fields[1:cpuFields] {
v, err := parseUint(i)
if err != nil {
return 0, err
}
totalClockTicks += v
}
return (totalClockTicks * uint64(time.Second)) / cpuTicks, nil
}
}
return 0, errors.New("bad stats format")
}
func totalCpuUsage() (usage uint64, err error) {
var cg *cgroup
if cg, err = currentCgroup(); err != nil {
return
}
return cg.acctUsageAllCpus()
}

View File

@@ -0,0 +1,9 @@
package internal
import "testing"
func BenchmarkRefreshCpu(b *testing.B) {
for i := 0; i < b.N; i++ {
RefreshCpu()
}
}

View File

@@ -0,0 +1,7 @@
// +build !linux
package internal
func RefreshCpu() uint64 {
return 0
}

210
core/stat/metrics.go Normal file
View File

@@ -0,0 +1,210 @@
package stat
import (
"os"
"sync"
"time"
"zero/core/executors"
"zero/core/logx"
)
var (
LogInterval = time.Minute
writerLock sync.Mutex
reportWriter Writer = nil
)
type (
Writer interface {
Write(report *StatReport) error
}
StatReport struct {
Name string `json:"name"`
Timestamp int64 `json:"tm"`
Pid int `json:"pid"`
ReqsPerSecond float32 `json:"qps"`
Drops int `json:"drops"`
Average float32 `json:"avg"`
Median float32 `json:"med"`
Top90th float32 `json:"t90"`
Top99th float32 `json:"t99"`
Top99p9th float32 `json:"t99p9"`
}
Metrics struct {
executor *executors.PeriodicalExecutor
container *metricsContainer
}
)
func SetReportWriter(writer Writer) {
writerLock.Lock()
reportWriter = writer
writerLock.Unlock()
}
func NewMetrics(name string) *Metrics {
container := &metricsContainer{
name: name,
pid: os.Getpid(),
}
return &Metrics{
executor: executors.NewPeriodicalExecutor(LogInterval, container),
container: container,
}
}
func (m *Metrics) Add(task Task) {
m.executor.Add(task)
}
func (m *Metrics) AddDrop() {
m.executor.Add(Task{
Drop: true,
})
}
func (m *Metrics) SetName(name string) {
m.executor.Sync(func() {
m.container.name = name
})
}
type (
tasksDurationPair struct {
tasks []Task
duration time.Duration
drops int
}
metricsContainer struct {
name string
pid int
tasks []Task
duration time.Duration
drops int
}
)
func (c *metricsContainer) AddTask(v interface{}) bool {
if task, ok := v.(Task); ok {
if task.Drop {
c.drops++
} else {
c.tasks = append(c.tasks, task)
c.duration += task.Duration
}
}
return false
}
func (c *metricsContainer) Execute(v interface{}) {
pair := v.(tasksDurationPair)
tasks := pair.tasks
duration := pair.duration
drops := pair.drops
size := len(tasks)
report := &StatReport{
Name: c.name,
Timestamp: time.Now().Unix(),
Pid: c.pid,
ReqsPerSecond: float32(size) / float32(LogInterval/time.Second),
Drops: drops,
}
if size > 0 {
report.Average = float32(duration/time.Millisecond) / float32(size)
fiftyPercent := size >> 1
if fiftyPercent > 0 {
top50pTasks := topK(tasks, fiftyPercent)
medianTask := top50pTasks[0]
report.Median = float32(medianTask.Duration) / float32(time.Millisecond)
tenPercent := fiftyPercent / 5
if tenPercent > 0 {
top10pTasks := topK(tasks, tenPercent)
task90th := top10pTasks[0]
report.Top90th = float32(task90th.Duration) / float32(time.Millisecond)
onePercent := tenPercent / 10
if onePercent > 0 {
top1pTasks := topK(top10pTasks, onePercent)
task99th := top1pTasks[0]
report.Top99th = float32(task99th.Duration) / float32(time.Millisecond)
pointOnePercent := onePercent / 10
if pointOnePercent > 0 {
topPointOneTasks := topK(top1pTasks, pointOnePercent)
task99Point9th := topPointOneTasks[0]
report.Top99p9th = float32(task99Point9th.Duration) / float32(time.Millisecond)
} else {
report.Top99p9th = getTopDuration(top1pTasks)
}
} else {
mostDuration := getTopDuration(top10pTasks)
report.Top99th = mostDuration
report.Top99p9th = mostDuration
}
} else {
mostDuration := getTopDuration(tasks)
report.Top90th = mostDuration
report.Top99th = mostDuration
report.Top99p9th = mostDuration
}
} else {
mostDuration := getTopDuration(tasks)
report.Median = mostDuration
report.Top90th = mostDuration
report.Top99th = mostDuration
report.Top99p9th = mostDuration
}
}
log(report)
}
func (c *metricsContainer) RemoveAll() interface{} {
tasks := c.tasks
duration := c.duration
drops := c.drops
c.tasks = nil
c.duration = 0
c.drops = 0
return tasksDurationPair{
tasks: tasks,
duration: duration,
drops: drops,
}
}
func getTopDuration(tasks []Task) float32 {
top := topK(tasks, 1)
if len(top) < 1 {
return 0
} else {
return float32(top[0].Duration) / float32(time.Millisecond)
}
}
func log(report *StatReport) {
writeReport(report)
logx.Statf("(%s) - qps: %.1f/s, drops: %d, avg time: %.1fms, med: %.1fms, "+
"90th: %.1fms, 99th: %.1fms, 99.9th: %.1fms",
report.Name, report.ReqsPerSecond, report.Drops, report.Average, report.Median,
report.Top90th, report.Top99th, report.Top99p9th)
}
func writeReport(report *StatReport) {
writerLock.Lock()
defer writerLock.Unlock()
if reportWriter != nil {
if err := reportWriter.Write(report); err != nil {
logx.Error(err)
}
}
}

48
core/stat/remotewriter.go Normal file
View File

@@ -0,0 +1,48 @@
package stat
import (
"bytes"
"encoding/json"
"errors"
"net/http"
"time"
"zero/core/logx"
)
const httpTimeout = time.Second * 5
var ErrWriteFailed = errors.New("submit failed")
type RemoteWriter struct {
endpoint string
}
func NewRemoteWriter(endpoint string) Writer {
return &RemoteWriter{
endpoint: endpoint,
}
}
func (rw *RemoteWriter) Write(report *StatReport) error {
bs, err := json.Marshal(report)
if err != nil {
return err
}
client := &http.Client{
Timeout: httpTimeout,
}
resp, err := client.Post(rw.endpoint, "application/json", bytes.NewBuffer(bs))
if err != nil {
return err
}
defer resp.Body.Close()
if resp.StatusCode != http.StatusOK {
logx.Errorf("write report failed, code: %d, reason: %s", resp.StatusCode, resp.Status)
return ErrWriteFailed
}
return nil
}

9
core/stat/task.go Normal file
View File

@@ -0,0 +1,9 @@
package stat
import "time"
type Task struct {
Drop bool
Duration time.Duration
Description string
}

45
core/stat/topk.go Normal file
View File

@@ -0,0 +1,45 @@
package stat
import "container/heap"
type taskHeap []Task
func (h *taskHeap) Len() int {
return len(*h)
}
func (h *taskHeap) Less(i, j int) bool {
return (*h)[i].Duration < (*h)[j].Duration
}
func (h *taskHeap) Swap(i, j int) {
(*h)[i], (*h)[j] = (*h)[j], (*h)[i]
}
func (h *taskHeap) Push(x interface{}) {
*h = append(*h, x.(Task))
}
func (h *taskHeap) Pop() interface{} {
old := *h
n := len(old)
x := old[n-1]
*h = old[0 : n-1]
return x
}
func topK(all []Task, k int) []Task {
h := new(taskHeap)
heap.Init(h)
for _, each := range all {
if h.Len() < k {
heap.Push(h, each)
} else if (*h)[0].Duration < each.Duration {
heap.Pop(h)
heap.Push(h, each)
}
}
return *h
}

62
core/stat/topk_test.go Normal file
View File

@@ -0,0 +1,62 @@
package stat
import (
"math/rand"
"testing"
"time"
"github.com/stretchr/testify/assert"
)
const (
numSamples = 10000
topNum = 100
)
var samples []Task
func init() {
for i := 0; i < numSamples; i++ {
task := Task{
Duration: time.Duration(rand.Int63()),
}
samples = append(samples, task)
}
}
func TestTopK(t *testing.T) {
tasks := []Task{
{false, 1, "a"},
{false, 4, "a"},
{false, 2, "a"},
{false, 5, "a"},
{false, 9, "a"},
{false, 10, "a"},
{false, 12, "a"},
{false, 3, "a"},
{false, 6, "a"},
{false, 11, "a"},
{false, 8, "a"},
}
result := topK(tasks, 3)
if len(result) != 3 {
t.Fail()
}
set := make(map[time.Duration]struct{})
for _, each := range result {
set[each.Duration] = struct{}{}
}
for _, v := range []time.Duration{10, 11, 12} {
_, ok := set[v]
assert.True(t, ok)
}
}
func BenchmarkTopkHeap(b *testing.B) {
for i := 0; i < b.N; i++ {
topK(samples, topNum)
}
}

60
core/stat/usage.go Normal file
View File

@@ -0,0 +1,60 @@
package stat
import (
"runtime"
"sync/atomic"
"time"
"zero/core/logx"
"zero/core/stat/internal"
"zero/core/threading"
)
const (
// 250ms and 0.95 as beta will count the average cpu load for past 5 seconds
cpuRefreshInterval = time.Millisecond * 250
allRefreshInterval = time.Minute
// moving average beta hyperparameter
beta = 0.95
)
var cpuUsage int64
func init() {
go func() {
cpuTicker := time.NewTicker(cpuRefreshInterval)
defer cpuTicker.Stop()
allTicker := time.NewTicker(allRefreshInterval)
defer allTicker.Stop()
for {
select {
case <-cpuTicker.C:
threading.RunSafe(func() {
curUsage := internal.RefreshCpu()
prevUsage := atomic.LoadInt64(&cpuUsage)
// cpu = cpuᵗ⁻¹ * beta + cpuᵗ * (1 - beta)
usage := int64(float64(prevUsage)*beta + float64(curUsage)*(1-beta))
atomic.StoreInt64(&cpuUsage, usage)
})
case <-allTicker.C:
printUsage()
}
}
}()
}
func CpuUsage() int64 {
return atomic.LoadInt64(&cpuUsage)
}
func bToMb(b uint64) float32 {
return float32(b) / 1024 / 1024
}
func printUsage() {
var m runtime.MemStats
runtime.ReadMemStats(&m)
logx.Statf("CPU: %dm, MEMORY: Alloc=%.1fMi, TotalAlloc=%.1fMi, Sys=%.1fMi, NumGC=%d",
CpuUsage(), bToMb(m.Alloc), bToMb(m.TotalAlloc), bToMb(m.Sys), m.NumGC)
}