Skip to content
This repository was archived by the owner on Jul 27, 2023. It is now read-only.

Commit

Permalink
Merge pull request #215 from DataDog/shang/fix-chunk-bug
Browse files Browse the repository at this point in the history
[proc] put containers and processes running in containers in the same chunk
  • Loading branch information
shang-wang authored Nov 30, 2018
2 parents 6fe361f + 110630f commit 9afcd7c
Show file tree
Hide file tree
Showing 5 changed files with 405 additions and 70 deletions.
33 changes: 20 additions & 13 deletions checks/container.go
Original file line number Diff line number Diff line change
Expand Up @@ -58,10 +58,10 @@ func (c *ContainerCheck) Run(cfg *config.AgentConfig, groupID int32) ([]model.Me
}

groupSize := len(ctrList) / cfg.MaxPerMessage
if len(ctrList) != cfg.MaxPerMessage {
if len(ctrList) != cfg.MaxPerMessage*groupSize {
groupSize++
}
chunked := fmtContainers(ctrList, c.lastRates, c.lastRun, groupSize)
chunked := chunkContainers(ctrList, c.lastRates, c.lastRun, groupSize, cfg.MaxPerMessage)
messages := make([]model.MessageBody, 0, groupSize)
totalContainers := float64(0)
for i := 0; i < groupSize; i++ {
Expand All @@ -83,13 +83,9 @@ func (c *ContainerCheck) Run(cfg *config.AgentConfig, groupID int32) ([]model.Me
return messages, nil
}

// fmtContainers formats and chunks the ctrList into a slice of chunks using a specific
// number of chunks. len(result) MUST EQUAL chunks.
func fmtContainers(ctrList []*containers.Container, lastRates map[string]util.ContainerRateMetrics, lastRun time.Time, chunks int) [][]*model.Container {
perChunk := (len(ctrList) / chunks) + 1
chunked := make([][]*model.Container, chunks)
chunk := make([]*model.Container, 0, perChunk)
i := 0
// fmtContainers loops through container list and converts them to a list of container objects
func fmtContainers(ctrList []*containers.Container, lastRates map[string]util.ContainerRateMetrics, lastRun time.Time) []*model.Container {
containers := make([]*model.Container, 0, len(ctrList))
for _, ctr := range ctrList {
lastCtr, ok := lastRates[ctr.ID]
if !ok {
Expand All @@ -112,7 +108,7 @@ func fmtContainers(ctrList []*containers.Container, lastRates map[string]util.Co
tags = []string{}
}

chunk = append(chunk, &model.Container{
containers = append(containers, &model.Container{
Id: ctr.ID,
Type: ctr.Type,
CpuLimit: float32(ctr.CPULimit),
Expand All @@ -135,15 +131,26 @@ func fmtContainers(ctrList []*containers.Container, lastRates map[string]util.Co
Started: ctr.StartedAt,
Tags: tags,
})
}
return containers
}

// chunkContainers formats and chunks the ctrList into a slice of chunks using a specific number of chunks.
func chunkContainers(ctrList []*containers.Container, lastRates map[string]util.ContainerRateMetrics, lastRun time.Time, chunks, perChunk int) [][]*model.Container {
chunked := make([][]*model.Container, 0, chunks)
chunk := make([]*model.Container, 0, perChunk)

containers := fmtContainers(ctrList, lastRates, lastRun)

for _, ctr := range containers {
chunk = append(chunk, ctr)
if len(chunk) == perChunk {
chunked[i] = chunk
chunked = append(chunked, chunk)
chunk = make([]*model.Container, 0, perChunk)
i++
}
}
if len(chunk) > 0 {
chunked[i] = chunk
chunked = append(chunked, chunk)
}
return chunked
}
Expand Down
13 changes: 6 additions & 7 deletions checks/container_nolinux.go
Original file line number Diff line number Diff line change
Expand Up @@ -42,13 +42,12 @@ func (c *ContainerCheck) Run(cfg *config.AgentConfig, groupID int32) ([]model.Me
return nil, nil
}

// fmtContainers formats and chunks the containers into a slice of chunks using a specific
// chunkContainers formats and chunks the containers into a slice of chunks using a specific
// number of chunks. len(result) MUST EQUAL chunks.
func fmtContainers(
ctrList []*containers.Container,
lastRates map[string]util.ContainerRateMetrics,
lastRun time.Time,
chunks int,
) [][]*model.Container {
func chunkContainers(ctrList []*containers.Container, lastRates map[string]util.ContainerRateMetrics, lastRun time.Time, chunks, perChunk int) [][]*model.Container {
return make([][]*model.Container, chunks)
}

func fmtContainers(ctrList []*containers.Container, lastRates map[string]util.ContainerRateMetrics, lastRun time.Time) []*model.Container {
return make([]*model.Container, 0)
}
20 changes: 12 additions & 8 deletions checks/container_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -35,27 +35,31 @@ func TestContainerChunking(t *testing.T) {
last map[string]util.ContainerRateMetrics
chunks int
expected int
maxSize int
}{
{
cur: []*containers.Container{ctrs[0], ctrs[1], ctrs[2]},
last: util.ExtractContainerRateMetric([]*containers.Container{ctrs[0], ctrs[1], ctrs[2]}),
chunks: 2,
expected: 3,
maxSize: 2,
},
{
cur: []*containers.Container{ctrs[0], ctrs[1], ctrs[2]},
last: util.ExtractContainerRateMetric([]*containers.Container{ctrs[0], ctrs[2]}),
chunks: 2,
expected: 3,
maxSize: 2,
},
{
cur: []*containers.Container{ctrs[0], ctrs[2]},
last: util.ExtractContainerRateMetric([]*containers.Container{ctrs[0], ctrs[1], ctrs[2]}),
chunks: 20,
chunks: 2,
expected: 2,
maxSize: 1,
},
} {
chunked := fmtContainers(tc.cur, tc.last, lastRun, tc.chunks)
chunked := chunkContainers(tc.cur, tc.last, lastRun, tc.chunks, tc.maxSize)
assert.Len(t, chunked, tc.chunks, "len test %d", i)
total := 0
for _, c := range chunked {
Expand All @@ -74,22 +78,22 @@ func TestContainerChunking(t *testing.T) {
}
}

func TestContainerAddressList(t *testing.T) {
func TestContainerAddresses(t *testing.T) {
ctr := makeContainer("haha")
ctr.AddressList = []containers.NetworkAddress{containers.NetworkAddress{IP: net.ParseIP("192.168.128.141"), Port: 443, Protocol: "TCP"}}
results := fmtContainers([]*containers.Container{ctr}, map[string]util.ContainerRateMetrics{}, time.Now(), 1)
assert.Equal(t, 1, len(results[0]))
results := fmtContainers([]*containers.Container{ctr}, map[string]util.ContainerRateMetrics{}, time.Now())
assert.Equal(t, 1, len(results))
addrs := []*model.ContainerAddr{
&model.ContainerAddr{Ip: "192.168.128.141", Port: int32(443), Protocol: model.ConnectionType_tcp},
}
assert.Equal(t, results[0][0].Addresses, addrs)
assert.Equal(t, results[0].Addresses, addrs)
}

func TestContainerNils(t *testing.T) {
// Make sure formatting doesn't crash with nils
cur := []*containers.Container{&containers.Container{}}
last := map[string]util.ContainerRateMetrics{}
fmtContainers(cur, last, time.Now(), 10)
chunkContainers(cur, last, time.Now(), 10, 10)
fmtContainerStats(cur, last, time.Now(), 10)
// Make sure we get values when we have nils in last.
cur = []*containers.Container{
Expand All @@ -103,7 +107,7 @@ func TestContainerNils(t *testing.T) {
CPU: &metrics.CgroupTimesStat{},
},
}
fmtContainers(cur, last, time.Now(), 10)
chunkContainers(cur, last, time.Now(), 10, 10)
fmtContainerStats(cur, last, time.Now(), 10)
}

Expand Down
130 changes: 94 additions & 36 deletions checks/process.go
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,8 @@ import (
"github.com/DataDog/datadog-process-agent/util"
)

const emptyCtrID = ""

// Process is a singleton ProcessCheck.
var Process = &ProcessCheck{}

Expand Down Expand Up @@ -76,28 +78,10 @@ func (p *ProcessCheck) Run(cfg *config.AgentConfig, groupID int32) ([]model.Mess
return nil, nil
}

chunkedProcs := fmtProcesses(cfg, procs, p.lastProcs,
ctrList, cpuTimes[0], p.lastCPUTime, p.lastRun)
// In case we skip every process..
if len(chunkedProcs) == 0 {
return nil, nil
}
groupSize := len(chunkedProcs)
chunkedContainers := fmtContainers(ctrList, p.lastCtrRates, p.lastRun, groupSize)
messages := make([]model.MessageBody, 0, groupSize)
totalProcs, totalContainers := float64(0), float64(0)
for i := 0; i < groupSize; i++ {
totalProcs += float64(len(chunkedProcs[i]))
totalContainers += float64(len(chunkedContainers[i]))
messages = append(messages, &model.CollectorProc{
HostName: cfg.HostName,
Info: p.sysInfo,
Processes: chunkedProcs[i],
Containers: chunkedContainers[i],
GroupId: groupID,
GroupSize: int32(groupSize),
})
}
procsByCtr := fmtProcesses(cfg, procs, p.lastProcs, ctrList, cpuTimes[0], p.lastCPUTime, p.lastRun)
containers := fmtContainers(ctrList, p.lastCtrRates, p.lastRun)

messages, totalProcs, totalContainers := createProcCtrMessages(procsByCtr, containers, cfg, p.sysInfo, groupID)

// Store the last state for comparison on the next run.
// Note: not storing the filtered in case there are new processes that haven't had a chance to show up twice.
Expand All @@ -106,28 +90,102 @@ func (p *ProcessCheck) Run(cfg *config.AgentConfig, groupID int32) ([]model.Mess
p.lastCPUTime = cpuTimes[0]
p.lastRun = time.Now()

statsd.Client.Gauge("datadog.process.containers.host_count", totalContainers, []string{}, 1)
statsd.Client.Gauge("datadog.process.processes.host_count", totalProcs, []string{}, 1)
statsd.Client.Gauge("datadog.process.containers.host_count", float64(totalContainers), []string{}, 1)
statsd.Client.Gauge("datadog.process.processes.host_count", float64(totalProcs), []string{}, 1)
log.Debugf("collected processes in %s", time.Now().Sub(start))
return messages, nil
}

func createProcCtrMessages(
procsByCtr map[string][]*model.Process,
containers []*model.Container,
cfg *config.AgentConfig,
sysInfo *model.SystemInfo,
groupID int32,
) ([]model.MessageBody, int, int) {
totalProcs, totalContainers := 0, 0
msgs := make([]*model.CollectorProc, 0)

// we first split non-container processes in chunks
chunks := chunkProcesses(procsByCtr[emptyCtrID], cfg.MaxPerMessage)
for _, c := range chunks {
msgs = append(msgs, &model.CollectorProc{
HostName: cfg.HostName,
Info: sysInfo,
Processes: c,
GroupId: groupID,
})
}

ctrProcs := make([]*model.Process, 0)
ctrs := make([]*model.Container, 0, len(containers))
for _, ctr := range containers {
if procs, ok := procsByCtr[ctr.Id]; ok {
ctrProcs = append(ctrProcs, procs...)
}
ctrs = append(ctrs, ctr)
}

if len(ctrs) > 0 {
msgs = append(msgs, &model.CollectorProc{
HostName: cfg.HostName,
Info: sysInfo,
Processes: ctrProcs,
Containers: ctrs,
GroupId: groupID,
})
}

// fill in GroupSize for each CollectorProc and convert them to final messages
// also count containers and processes
messages := make([]model.MessageBody, 0, len(msgs))
for _, m := range msgs {
m.GroupSize = int32(len(msgs))
messages = append(messages, m)
totalProcs += len(m.Processes)
totalContainers += len(m.Containers)
}

return messages, totalProcs, totalContainers
}

// chunkProcesses split non-container processes into chunks and return a list of chunks
func chunkProcesses(procs []*model.Process, size int) [][]*model.Process {
chunkCount := len(procs) / size
if chunkCount*size < len(procs) {
chunkCount++
}
chunks := make([][]*model.Process, 0, chunkCount)

for i := 0; i < len(procs); i += size {
end := i + size
if end > len(procs) {
end = len(procs)
}
chunks = append(chunks, procs[i:end])
}

return chunks
}

// fmtProcesses goes through each process, converts them to process object and group them by containers
// non-container processes would be in a single group with key as empty string ""
func fmtProcesses(
cfg *config.AgentConfig,
procs, lastProcs map[int32]*process.FilledProcess,
ctrList []*containers.Container,
syst2, syst1 cpu.TimesStat,
lastRun time.Time,
) [][]*model.Process {
) map[string][]*model.Process {
cidByPid := make(map[int32]string, len(ctrList))
for _, c := range ctrList {
for _, p := range c.Pids {
cidByPid[p] = c.ID
}
}

chunked := make([][]*model.Process, 0)
chunk := make([]*model.Process, 0, cfg.MaxPerMessage)
procsByCtr := make(map[string][]*model.Process)

for _, fp := range procs {
if skipProcess(cfg, fp, lastProcs) {
continue
Expand All @@ -136,7 +194,7 @@ func fmtProcesses(
// Hide blacklisted args if the Scrubber is enabled
fp.Cmdline = cfg.Scrubber.ScrubProcessCommand(fp)

chunk = append(chunk, &model.Process{
proc := &model.Process{
Pid: fp.Pid,
Command: formatCommand(fp),
User: formatUser(fp),
Expand All @@ -149,17 +207,17 @@ func fmtProcesses(
VoluntaryCtxSwitches: uint64(fp.CtxSwitches.Voluntary),
InvoluntaryCtxSwitches: uint64(fp.CtxSwitches.Involuntary),
ContainerId: cidByPid[fp.Pid],
})
if len(chunk) == cfg.MaxPerMessage {
chunked = append(chunked, chunk)
chunk = make([]*model.Process, 0, cfg.MaxPerMessage)
}
_, ok := procsByCtr[proc.ContainerId]
if !ok {
procsByCtr[proc.ContainerId] = make([]*model.Process, 0)
}
procsByCtr[proc.ContainerId] = append(procsByCtr[proc.ContainerId], proc)
}
if len(chunk) > 0 {
chunked = append(chunked, chunk)
}

cfg.Scrubber.IncrementCacheAge()
return chunked

return procsByCtr
}

func formatCommand(fp *process.FilledProcess) *model.Command {
Expand Down
Loading

0 comments on commit 9afcd7c

Please sign in to comment.