@@ -15,6 +15,8 @@ import (
1515 "github.com/DataDog/datadog-process-agent/util"
1616)
1717
18+ const emptyCtrID = ""
19+
1820// Process is a singleton ProcessCheck.
1921var Process = & ProcessCheck {}
2022
@@ -76,28 +78,10 @@ func (p *ProcessCheck) Run(cfg *config.AgentConfig, groupID int32) ([]model.Mess
7678 return nil , nil
7779 }
7880
79- chunkedProcs := fmtProcesses (cfg , procs , p .lastProcs ,
80- ctrList , cpuTimes [0 ], p .lastCPUTime , p .lastRun )
81- // In case we skip every process..
82- if len (chunkedProcs ) == 0 {
83- return nil , nil
84- }
85- groupSize := len (chunkedProcs )
86- chunkedContainers := fmtContainers (ctrList , p .lastCtrRates , p .lastRun , groupSize )
87- messages := make ([]model.MessageBody , 0 , groupSize )
88- totalProcs , totalContainers := float64 (0 ), float64 (0 )
89- for i := 0 ; i < groupSize ; i ++ {
90- totalProcs += float64 (len (chunkedProcs [i ]))
91- totalContainers += float64 (len (chunkedContainers [i ]))
92- messages = append (messages , & model.CollectorProc {
93- HostName : cfg .HostName ,
94- Info : p .sysInfo ,
95- Processes : chunkedProcs [i ],
96- Containers : chunkedContainers [i ],
97- GroupId : groupID ,
98- GroupSize : int32 (groupSize ),
99- })
100- }
81+ procsByCtr := fmtProcesses (cfg , procs , p .lastProcs , ctrList , cpuTimes [0 ], p .lastCPUTime , p .lastRun )
82+ containers := fmtContainers (ctrList , p .lastCtrRates , p .lastRun )
83+
84+ messages , totalProcs , totalContainers := createProcCtrMessages (procsByCtr , containers , cfg , p .sysInfo , groupID )
10185
10286 // Store the last state for comparison on the next run.
10387 // Note: not storing the filtered in case there are new processes that haven't had a chance to show up twice.
@@ -106,28 +90,102 @@ func (p *ProcessCheck) Run(cfg *config.AgentConfig, groupID int32) ([]model.Mess
10690 p .lastCPUTime = cpuTimes [0 ]
10791 p .lastRun = time .Now ()
10892
109- statsd .Client .Gauge ("datadog.process.containers.host_count" , totalContainers , []string {}, 1 )
110- statsd .Client .Gauge ("datadog.process.processes.host_count" , totalProcs , []string {}, 1 )
93+ statsd .Client .Gauge ("datadog.process.containers.host_count" , float64 ( totalContainers ) , []string {}, 1 )
94+ statsd .Client .Gauge ("datadog.process.processes.host_count" , float64 ( totalProcs ) , []string {}, 1 )
11195 log .Debugf ("collected processes in %s" , time .Now ().Sub (start ))
11296 return messages , nil
11397}
11498
99+ func createProcCtrMessages (
100+ procsByCtr map [string ][]* model.Process ,
101+ containers []* model.Container ,
102+ cfg * config.AgentConfig ,
103+ sysInfo * model.SystemInfo ,
104+ groupID int32 ,
105+ ) ([]model.MessageBody , int , int ) {
106+ totalProcs , totalContainers := 0 , 0
107+ msgs := make ([]* model.CollectorProc , 0 )
108+
109+ // we first split non-container processes in chunks
110+ chunks := chunkProcesses (procsByCtr [emptyCtrID ], cfg .MaxPerMessage )
111+ for _ , c := range chunks {
112+ msgs = append (msgs , & model.CollectorProc {
113+ HostName : cfg .HostName ,
114+ Info : sysInfo ,
115+ Processes : c ,
116+ GroupId : groupID ,
117+ })
118+ }
119+
120+ ctrProcs := make ([]* model.Process , 0 )
121+ ctrs := make ([]* model.Container , 0 , len (containers ))
122+ for _ , ctr := range containers {
123+ if procs , ok := procsByCtr [ctr .Id ]; ok {
124+ ctrProcs = append (ctrProcs , procs ... )
125+ }
126+ ctrs = append (ctrs , ctr )
127+ }
128+
129+ if len (ctrs ) > 0 {
130+ msgs = append (msgs , & model.CollectorProc {
131+ HostName : cfg .HostName ,
132+ Info : sysInfo ,
133+ Processes : ctrProcs ,
134+ Containers : ctrs ,
135+ GroupId : groupID ,
136+ })
137+ }
138+
139+ // fill in GroupSize for each CollectorProc and convert them to final messages
140+ // also count containers and processes
141+ messages := make ([]model.MessageBody , 0 , len (msgs ))
142+ for _ , m := range msgs {
143+ m .GroupSize = int32 (len (msgs ))
144+ messages = append (messages , m )
145+ totalProcs += len (m .Processes )
146+ totalContainers += len (m .Containers )
147+ }
148+
149+ return messages , totalProcs , totalContainers
150+ }
151+
152+ // chunkProcesses split non-container processes into chunks and return a list of chunks
153+ func chunkProcesses (procs []* model.Process , size int ) [][]* model.Process {
154+ chunkCount := len (procs ) / size
155+ if chunkCount * size < len (procs ) {
156+ chunkCount ++
157+ }
158+ chunks := make ([][]* model.Process , 0 , chunkCount )
159+
160+ for i := 0 ; i < len (procs ); i += size {
161+ end := i + size
162+ if end > len (procs ) {
163+ end = len (procs )
164+ }
165+ chunks = append (chunks , procs [i :end ])
166+ }
167+
168+ return chunks
169+ }
170+
171+ // fmtProcesses goes through each process, converts them to process object and group them by containers
172+ // non-container processes would be in a single group with key as empty string ""
115173func fmtProcesses (
116174 cfg * config.AgentConfig ,
117175 procs , lastProcs map [int32 ]* process.FilledProcess ,
118176 ctrList []* containers.Container ,
119177 syst2 , syst1 cpu.TimesStat ,
120178 lastRun time.Time ,
121- ) [ ][]* model.Process {
179+ ) map [ string ][]* model.Process {
122180 cidByPid := make (map [int32 ]string , len (ctrList ))
123181 for _ , c := range ctrList {
124182 for _ , p := range c .Pids {
125183 cidByPid [p ] = c .ID
126184 }
127185 }
128186
129- chunked := make ([ ][]* model.Process , 0 )
130- chunk := make ([] * model. Process , 0 , cfg . MaxPerMessage )
187+ procsByCtr := make (map [ string ][]* model.Process )
188+
131189 for _ , fp := range procs {
132190 if skipProcess (cfg , fp , lastProcs ) {
133191 continue
@@ -136,7 +194,7 @@ func fmtProcesses(
136194 // Hide blacklisted args if the Scrubber is enabled
137195 fp .Cmdline = cfg .Scrubber .ScrubProcessCommand (fp )
138196
139- chunk = append ( chunk , & model.Process {
197+ proc := & model.Process {
140198 Pid : fp .Pid ,
141199 Command : formatCommand (fp ),
142200 User : formatUser (fp ),
@@ -149,17 +207,17 @@ func fmtProcesses(
149207 VoluntaryCtxSwitches : uint64 (fp .CtxSwitches .Voluntary ),
150208 InvoluntaryCtxSwitches : uint64 (fp .CtxSwitches .Involuntary ),
151209 ContainerId : cidByPid [fp .Pid ],
152- })
153- if len (chunk ) == cfg .MaxPerMessage {
154- chunked = append (chunked , chunk )
155- chunk = make ([]* model.Process , 0 , cfg .MaxPerMessage )
156210 }
211+ _ , ok := procsByCtr [proc .ContainerId ]
212+ if ! ok {
213+ procsByCtr [proc .ContainerId ] = make ([]* model.Process , 0 )
214+ }
215+ procsByCtr [proc .ContainerId ] = append (procsByCtr [proc .ContainerId ], proc )
157216 }
158- if len (chunk ) > 0 {
159- chunked = append (chunked , chunk )
160- }
217+
161218 cfg .Scrubber .IncrementCacheAge ()
162- return chunked
219+
220+ return procsByCtr
163221}
164222
165223func formatCommand (fp * process.FilledProcess ) * model.Command {
0 commit comments