Skip to content

Commit 620255e

Browse files
committed
chore(inputs.opcua): Add trace logging for gather cycle timing diagnostics
Add Tracef instrumentation throughout the OPC UA read client gather cycle to help diagnose performance issues where collection exceeds the configured interval. The trace logs cover connection checks, PLC read request/response timing, node value updates, metric construction, and accumulator delivery, making it straightforward to identify whether delays originate from the PLC or from Telegraf processing.
1 parent 4c98036 commit 620255e

2 files changed

Lines changed: 26 additions & 0 deletions

File tree

plugins/inputs/opcua/opcua.go

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,9 @@ func (o *OpcUA) Init() (err error) {
3535
}
3636

3737
func (o *OpcUA) Gather(acc telegraf.Accumulator) error {
38+
gatherStart := time.Now()
39+
o.Log.Tracef("Gather starting for %d nodes...", len(o.client.NodeIDs))
40+
3841
// Force reconnection every time if a threshold is 0
3942
if o.client.ReconnectErrorThreshold == 0 {
4043
o.client.forceReconnect = true
@@ -44,6 +47,8 @@ func (o *OpcUA) Gather(acc telegraf.Accumulator) error {
4447
metrics, err := o.client.currentValues()
4548
if err != nil {
4649
o.consecutiveErrors++
50+
o.Log.Tracef("Gather failed after %s: %v (consecutive errors: %d)",
51+
time.Since(gatherStart), err, o.consecutiveErrors)
4752

4853
// Force reconnection based on an error threshold: if threshold > 0, reconnect after
4954
// reaching the specified number of consecutive errors; if a threshold = 0, we already
@@ -57,10 +62,13 @@ func (o *OpcUA) Gather(acc telegraf.Accumulator) error {
5762
// Reset error counter on success
5863
o.consecutiveErrors = 0
5964

65+
addStart := time.Now()
6066
// Parse the resulting data into metrics
6167
for _, m := range metrics {
6268
acc.AddMetric(m)
6369
}
70+
o.Log.Tracef("Gather complete: %d metrics added to accumulator in %s, total gather time %s",
71+
len(metrics), time.Since(addStart), time.Since(gatherStart))
6472
return nil
6573
}
6674

plugins/inputs/opcua/read_client.go

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -141,15 +141,19 @@ func (o *readClient) ensureConnected() error {
141141
}
142142

143143
func (o *readClient) currentValues() ([]telegraf.Metric, error) {
144+
connectStart := time.Now()
144145
if err := o.ensureConnected(); err != nil {
145146
return nil, err
146147
}
148+
o.Log.Tracef("Connection check took %s", time.Since(connectStart))
147149

148150
if state := o.State(); state != opcua.Connected {
149151
return nil, fmt.Errorf("not connected, in state %q", state)
150152
}
151153

154+
readStart := time.Now()
152155
if err := o.read(); err != nil {
156+
o.Log.Tracef("Read failed after %s: %v", time.Since(readStart), err)
153157
// We do not return the disconnect error, as this would mask the
154158
// original problem, but we do log it
155159
if derr := o.Disconnect(context.Background()); derr != nil {
@@ -158,16 +162,22 @@ func (o *readClient) currentValues() ([]telegraf.Metric, error) {
158162

159163
return nil, err
160164
}
165+
o.Log.Tracef("OPC UA read of %d nodes took %s", len(o.NodeIDs), time.Since(readStart))
161166

167+
metricStart := time.Now()
162168
metrics := make([]telegraf.Metric, 0, len(o.NodeMetricMapping))
169+
var skipped int
163170
// Parse the resulting data into metrics
164171
for i := range o.NodeIDs {
165172
if !o.StatusCodeOK(o.LastReceivedData[i].Quality) {
173+
skipped++
166174
continue
167175
}
168176

169177
metrics = append(metrics, o.MetricForNode(i))
170178
}
179+
o.Log.Tracef("Metric construction took %s (%d metrics, %d skipped due to bad quality)",
180+
time.Since(metricStart), len(metrics), skipped)
171181

172182
return metrics, nil
173183
}
@@ -185,18 +195,26 @@ func (o *readClient) read() error {
185195
count++
186196

187197
// Try to update the values for all registered nodes
198+
o.Log.Tracef("Sending OPC UA read request for %d %s nodes (attempt %d)...",
199+
len(o.reqIDs), nodeTypeLabel(o.Workarounds.UseUnregisteredReads), count)
200+
requestStart := time.Now()
188201
resp, err := o.Client.Read(o.ctx, req)
202+
plcDuration := time.Since(requestStart)
189203
if err == nil {
190204
// Success, update the node values and exit
191205
o.ReadSuccess.Incr(1)
192206
o.forceReconnect = false
207+
o.Log.Tracef("OPC UA read response received in %s, updating %d node values...", plcDuration, len(resp.Results))
208+
updateStart := time.Now()
193209
for i, d := range resp.Results {
194210
o.UpdateNodeValue(i, d)
195211
}
212+
o.Log.Tracef("Node value update took %s", time.Since(updateStart))
196213
return nil
197214
}
198215

199216
o.ReadError.Incr(1)
217+
o.Log.Tracef("OPC UA read request failed after %s: %v", plcDuration, err)
200218

201219
isSessionError := errors.Is(err, ua.StatusBadSessionIDInvalid) ||
202220
errors.Is(err, ua.StatusBadSessionNotActivated) ||

0 commit comments

Comments
 (0)