Skip to content

Commit 8fff658

Browse files
committed
BF: CS-1940 dbwriter reports an error when parsing accounting records around midnight
1 parent 49fb2bc commit 8fff658

3 files changed

Lines changed: 69 additions & 61 deletions

File tree

source/daemons/qmaster/sge_rusage.cc

Lines changed: 27 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -60,7 +60,7 @@
6060
#include "msg_qmaster.h"
6161

6262
#define ACTFILE_FPRINTF_FORMAT \
63-
"%s%c%s%c%s%c%s%c%s%c" sge_u32 "%c%s%c" sge_u32 "%c" sge_u64 "%c" sge_u64 "%c" sge_u64 "%c" sge_u32 "%c" sge_u32 "%c" \
63+
"%s%c%s%c%s%c%s%c%s%c" sge_u32 "%c%s%c" sge_u32 "%c" sge_u64 "%c" sge_u64 "%c" sge_u64 "%c" sge_u32 "%c%d%c" \
6464
sge_u32 "%c%f%c%f%c%f%c" sge_u32 "%c" sge_u32 "%c" sge_u32 "%c" sge_u32 "%c" sge_u32 "%c" sge_u32 "%c" sge_u32 "%c%f%c" \
6565
sge_u32 "%c" sge_u32 "%c" sge_u32 "%c" sge_u32 "%c" sge_u32 "%c" sge_u32 "%c%s%c%s%c%s%c%d%c" sge_u32 "%c%f%c%f%c%f%c%s%c%f%c%s%c%f%c" sge_u32 "%c" sge_u64 "" \
6666
"\n"
@@ -74,7 +74,7 @@ sge_u32 "%c" sge_u32 "%c" sge_u32 "%c" sge_u32 "%c" sge_u32 "%c" sge_u32 "%c%s%c
7474
*
7575
* SYNOPSIS
7676
* static uint32_t
77-
* reporting_get_ulong_usage(const lList *usage_list, lList *reported_list,
77+
* reporting_get_ulong_usage(const lList *usage_list, lList *reported_list,
7878
* const char *name, const char *rname, uint32_t def)
7979
*
8080
* FUNCTION
@@ -84,10 +84,10 @@ sge_u32 "%c" sge_u32 "%c" sge_u32 "%c" sge_u32 "%c" sge_u32 "%c" sge_u32 "%c%s%c
8484
* If no usage information is available for the given attribute, a default
8585
* value will be returned.
8686
*
87-
* name and rname may differ, as already reported usage is taken from job
87+
* name and rname may differ, as already reported usage is taken from job
8888
* online usage, e.g. attr USAGE_ATTR_CPU, whereas the final usage
8989
* is reported in the attr USAGE_ATTR_CPU_ACCT. When we report final usage,
90-
* we take the usage given by USAGE_ATTR_CPU_ACCT, but have to subtract
90+
* we take the usage given by USAGE_ATTR_CPU_ACCT, but have to subtract
9191
* already reported usage coming from online usage USAGE_ATTR_CPU.
9292
*
9393
* INPUTS
@@ -102,7 +102,7 @@ sge_u32 "%c" sge_u32 "%c" sge_u32 "%c" sge_u32 "%c" sge_u32 "%c" sge_u32 "%c%s%c
102102
* static uint32_t - the usage
103103
*
104104
* NOTES
105-
* MT-NOTE: reporting_get_ulong_usage() is MT safe
105+
* MT-NOTE: reporting_get_ulong_usage() is MT safe
106106
*
107107
* SEE ALSO
108108
* sgeobj/usage/usage_list_get_ulong_usage()
@@ -122,7 +122,7 @@ reporting_get_ulong_usage(const lList *usage_list, lList *reported_list,
122122
/* after this action, we'll have reported the total usage */
123123
usage_list_set_ulong_usage(reported_list, rname, usage);
124124

125-
/* in this intermediate accounting record, we'll report the usage
125+
/* in this intermediate accounting record, we'll report the usage
126126
* consumed since the last intermediate accounting record.
127127
*/
128128
usage -= reported;
@@ -152,10 +152,10 @@ reporting_get_ulong_usage(const lList *usage_list, lList *reported_list,
152152
* If reported_usage is nullptr, no usage will be booked as already reported,
153153
* e.g. for maximum values.
154154
*
155-
* name and rname may differ, as already reported usage is taken from job
155+
* name and rname may differ, as already reported usage is taken from job
156156
* online usage, e.g. attr USAGE_ATTR_CPU, whereas the final usage
157157
* is reported in the attr USAGE_ATTR_CPU_ACCT. When we report final usage,
158-
* we take the usage given by USAGE_ATTR_CPU_ACCT, but have to subtract
158+
* we take the usage given by USAGE_ATTR_CPU_ACCT, but have to subtract
159159
* already reported usage coming from online usage USAGE_ATTR_CPU.
160160
*
161161
* INPUTS
@@ -172,7 +172,7 @@ reporting_get_ulong_usage(const lList *usage_list, lList *reported_list,
172172
* static uint32_t - the usage
173173
*
174174
* NOTES
175-
* MT-NOTE: reporting_get_ulong_usage_sum() is MT safe
175+
* MT-NOTE: reporting_get_ulong_usage_sum() is MT safe
176176
*
177177
* SEE ALSO
178178
* sge_rusage/reporting_get_ulong_usage()
@@ -208,8 +208,8 @@ reporting_get_ulong_usage_sum(const lList *usage_list, lList *reported_list, boo
208208
*
209209
* SYNOPSIS
210210
* static double
211-
* reporting_get_double_usage(const lList *usage_list, lList *reported_list,
212-
* const char *name, const char *rname, double def)
211+
* reporting_get_double_usage(const lList *usage_list, lList *reported_list,
212+
* const char *name, const char *rname, double def)
213213
*
214214
* FUNCTION
215215
* Return the usage information of a certain attribute (e.g. cpu, mem, ...).
@@ -218,10 +218,10 @@ reporting_get_ulong_usage_sum(const lList *usage_list, lList *reported_list, boo
218218
* If no usage information is available for the given attribute, a default
219219
* value will be returned.
220220
*
221-
* name and rname may differ, as already reported usage is taken from job
221+
* name and rname may differ, as already reported usage is taken from job
222222
* online usage, e.g. attr USAGE_ATTR_CPU, whereas the final usage
223223
* is reported in the attr USAGE_ATTR_CPU_ACCT. When we report final usage,
224-
* we take the usage given by USAGE_ATTR_CPU_ACCT, but have to subtract
224+
* we take the usage given by USAGE_ATTR_CPU_ACCT, but have to subtract
225225
* already reported usage coming from online usage USAGE_ATTR_CPU.
226226
*
227227
* INPUTS
@@ -236,7 +236,7 @@ reporting_get_ulong_usage_sum(const lList *usage_list, lList *reported_list, boo
236236
* static double - the usage
237237
*
238238
* NOTES
239-
* MT-NOTE: reporting_get_double_usage() is MT safe
239+
* MT-NOTE: reporting_get_double_usage() is MT safe
240240
*
241241
* SEE ALSO
242242
* sgeobj/usage/usage_list_get_double_usage()
@@ -256,7 +256,7 @@ reporting_get_double_usage(const lList *usage_list, lList *reported_list, const
256256
/* after this action, we'll have reported the total usage */
257257
usage_list_set_double_usage(reported_list, rname, usage);
258258

259-
/* in this intermediate accounting record, we'll report the usage
259+
/* in this intermediate accounting record, we'll report the usage
260260
* consumed since the last intermediate accounting record.
261261
*/
262262
usage -= reported;
@@ -286,10 +286,10 @@ reporting_get_double_usage(const lList *usage_list, lList *reported_list, const
286286
* If reported_usage is nullptr, no usage will be booked as already reported,
287287
* e.g. for maximum values.
288288
*
289-
* name and rname may differ, as already reported usage is taken from job
289+
* name and rname may differ, as already reported usage is taken from job
290290
* online usage, e.g. attr USAGE_ATTR_CPU, whereas the final usage
291291
* is reported in the attr USAGE_ATTR_CPU_ACCT. When we report final usage,
292-
* we take the usage given by USAGE_ATTR_CPU_ACCT, but have to subtract
292+
* we take the usage given by USAGE_ATTR_CPU_ACCT, but have to subtract
293293
* already reported usage coming from online usage USAGE_ATTR_CPU.
294294
*
295295
* INPUTS
@@ -306,7 +306,7 @@ reporting_get_double_usage(const lList *usage_list, lList *reported_list, const
306306
* static double - the usage
307307
*
308308
* NOTES
309-
* MT-NOTE: reporting_get_double_usage_sum() is MT safe
309+
* MT-NOTE: reporting_get_double_usage_sum() is MT safe
310310
*
311311
* SEE ALSO
312312
* sge_rusage/reporting_get_double_usage()
@@ -374,7 +374,7 @@ sge_write_rusage(dstring *buffer, rapidjson::Writer<rapidjson::StringBuffer> *wr
374374
uint64_t now = sge_get_gmt64();
375375
uint32_t ar_id = 0;
376376
lListElem *ar = nullptr;
377-
uint32_t exit_status = 0;
377+
int exit_status = 0;
378378
bool do_accounting_summary = false;
379379
const lList *master_pe_list = *ocs::DataStore::get_master_list(SGE_TYPE_PE);
380380
const lList *master_ar_list = *ocs::DataStore::get_master_list(SGE_TYPE_AR);
@@ -389,7 +389,7 @@ sge_write_rusage(dstring *buffer, rapidjson::Writer<rapidjson::StringBuffer> *wr
389389
DRETURN(false);
390390
}
391391

392-
/*
392+
/*
393393
* Figure out if it is a parallel job,
394394
* and if we shall write individual accounting entries or a summary.
395395
*/
@@ -429,7 +429,7 @@ sge_write_rusage(dstring *buffer, rapidjson::Writer<rapidjson::StringBuffer> *wr
429429
}
430430

431431
/*
432-
* For intermediate records, we store the reported usage in
432+
* For intermediate records, we store the reported usage in
433433
* ja_task or pe_task reported_list.
434434
*/
435435
if (intermediate) {
@@ -439,7 +439,7 @@ sge_write_rusage(dstring *buffer, rapidjson::Writer<rapidjson::StringBuffer> *wr
439439
reported_list = lGetOrCreateList(ja_task, JAT_reported_usage_list, "reported_usage", UA_Type);
440440
}
441441

442-
/*
442+
/*
443443
* The LAST_INTERMEDIATE timestamp of the previous intermediate
444444
* record is the start_time of the current interval.
445445
*/
@@ -470,13 +470,13 @@ sge_write_rusage(dstring *buffer, rapidjson::Writer<rapidjson::StringBuffer> *wr
470470

471471
if (intermediate) {
472472
/*
473-
* for the job, we don't have the submission time in the job report
474-
* before job exit
473+
* for the job, we don't have the submission time in the job report
474+
* before job exit
475475
*/
476476
if (job != nullptr && pe_task == nullptr) {
477477
submission_time = lGetUlong64(job, JB_submission_time);
478478
}
479-
/*
479+
/*
480480
* For the first intermediate record, the start_time is the ja_task start time.
481481
* For consequent intermediate records, we already set the start_time to the
482482
* previous intermediate record's end time.
@@ -500,10 +500,10 @@ sge_write_rusage(dstring *buffer, rapidjson::Writer<rapidjson::StringBuffer> *wr
500500
* In this case set exit_status to -1, meaning in ARCo: Job still running.
501501
* See CR 6621482.
502502
*/
503-
exit_status = usage_list_get_ulong_usage(usage_list, "exit_status", -1);
503+
exit_status = usage_list_get_int_usage(usage_list, "exit_status", -1);
504504
} else {
505505
start_time = usage_list_get_ulong64_usage(usage_list, "start_time", 0);
506-
exit_status = usage_list_get_ulong_usage(usage_list, "exit_status", 0);
506+
exit_status = usage_list_get_int_usage(usage_list, "exit_status", 0);
507507
}
508508

509509
ar_id = lGetUlong(job, JB_ar);

source/libs/sgeobj/sge_usage.cc

Lines changed: 39 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -1,33 +1,33 @@
11
/*___INFO__MARK_BEGIN__*/
22
/*************************************************************************
3-
*
3+
*
44
* The Contents of this file are made available subject to the terms of
55
* the Sun Industry Standards Source License Version 1.2
6-
*
6+
*
77
* Sun Microsystems Inc., March, 2001
8-
*
9-
*
8+
*
9+
*
1010
* Sun Industry Standards Source License Version 1.2
1111
* =================================================
1212
* The contents of this file are subject to the Sun Industry Standards
1313
* Source License Version 1.2 (the "License"); You may not use this file
1414
* except in compliance with the License. You may obtain a copy of the
1515
* License at http://gridengine.sunsource.net/Gridengine_SISSL_license.html
16-
*
16+
*
1717
* Software provided under this License is provided on an "AS IS" basis,
1818
* WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING,
1919
* WITHOUT LIMITATION, WARRANTIES THAT THE SOFTWARE IS FREE OF DEFECTS,
2020
* MERCHANTABLE, FIT FOR A PARTICULAR PURPOSE, OR NON-INFRINGING.
2121
* See the License for the specific provisions governing your rights and
2222
* obligations concerning the Software.
23-
*
23+
*
2424
* The Initial Developer of the Original Code is: Sun Microsystems, Inc.
25-
*
25+
*
2626
* Copyright: 2001 by Sun Microsystems, Inc.
27-
*
27+
*
2828
* All Rights Reserved.
29-
*
30-
* Portions of this software are Copyright (c) 2023-2026 HPC-Gridware GmbH
29+
*
30+
* Portions of this software are Copyright (c) 2023-2024,2026 HPC-Gridware GmbH
3131
*
3232
************************************************************************/
3333
/*___INFO__MARK_END__*/
@@ -45,12 +45,12 @@
4545
*
4646
* SYNOPSIS
4747
* uint32_t
48-
* usage_list_get_ulong_usage(const lList *usage_list, const char *name,
48+
* usage_list_get_ulong_usage(const lList *usage_list, const char *name,
4949
* uint32_t def)
5050
*
5151
* FUNCTION
52-
* Searches a usage object with the given name in the given usage
53-
* list. If such an element is found, returns the value of the
52+
* Searches a usage object with the given name in the given usage
53+
* list. If such an element is found, returns the value of the
5454
* usage object as uint32_t value.
5555
* If no such element is found, return the given default value.
5656
*
@@ -65,25 +65,31 @@
6565
* SEE ALSO
6666
* gdi/usage/usage_list_get_double_usage()
6767
*******************************************************************************/
68+
int
69+
usage_list_get_int_usage(const lList *usage_list, const char *name, int def) {
70+
const lListElem *ep = lGetElemStr(usage_list, UA_name, name);
71+
if (ep != nullptr) {
72+
return static_cast<int>(lGetDouble(ep, UA_value));
73+
} else {
74+
return def;
75+
}
76+
}
77+
6878
uint32_t
69-
usage_list_get_ulong_usage(const lList *usage_list, const char *name,
70-
uint32_t def)
71-
{
79+
usage_list_get_ulong_usage(const lList *usage_list, const char *name, uint32_t def) {
7280
const lListElem *ep = lGetElemStr(usage_list, UA_name, name);
7381
if (ep != nullptr) {
74-
return (uint32_t)lGetDouble(ep, UA_value);
82+
return static_cast<uint32_t>(lGetDouble(ep, UA_value));
7583
} else {
7684
return def;
7785
}
7886
}
7987

8088
uint64_t
81-
usage_list_get_ulong64_usage(const lList *usage_list, const char *name,
82-
uint64_t def)
83-
{
89+
usage_list_get_ulong64_usage(const lList *usage_list, const char *name, uint64_t def) {
8490
const lListElem *ep = lGetElemStr(usage_list, UA_name, name);
8591
if (ep != nullptr) {
86-
return (uint64_t)lGetDouble(ep, UA_value);
92+
return static_cast<uint64_t>(lGetDouble(ep, UA_value));
8793
} else {
8894
return def;
8995
}
@@ -95,12 +101,12 @@ usage_list_get_ulong64_usage(const lList *usage_list, const char *name,
95101
*
96102
* SYNOPSIS
97103
* double
98-
* usage_list_get_double_usage(const lList *usage_list, const char *name,
99-
* double def)
104+
* usage_list_get_double_usage(const lList *usage_list, const char *name,
105+
* double def)
100106
*
101107
* FUNCTION
102-
* Searches a usage object with the given name in the given usage
103-
* list. If such an element is found, returns the value of the
108+
* Searches a usage object with the given name in the given usage
109+
* list. If such an element is found, returns the value of the
104110
* usage object as double value.
105111
* If no such element is found, return the given default value.
106112
*
@@ -133,7 +139,7 @@ usage_list_get_double_usage(const lList *usage_list, const char *name,
133139
*
134140
* SYNOPSIS
135141
* void
136-
* usage_list_set_ulong_usage(lList *usage_list, const char *name,
142+
* usage_list_set_ulong_usage(lList *usage_list, const char *name,
137143
* uint32_t value)
138144
*
139145
* FUNCTION
@@ -146,7 +152,7 @@ usage_list_get_double_usage(const lList *usage_list, const char *name,
146152
* uint32_t value - the new value
147153
*
148154
* NOTES
149-
* MT-NOTE: usage_list_set_ulong_usage() is MT safe
155+
* MT-NOTE: usage_list_set_ulong_usage() is MT safe
150156
*
151157
* SEE ALSO
152158
* sgeobj/usage/usage_list_set_double_usage()
@@ -171,8 +177,8 @@ usage_list_set_ulong64_usage(lList *usage_list, const char *name, uint64_t value
171177
*
172178
* SYNOPSIS
173179
* void
174-
* usage_list_set_double_usage(lList *usage_list, const char *name,
175-
* double value)
180+
* usage_list_set_double_usage(lList *usage_list, const char *name,
181+
* double value)
176182
*
177183
* FUNCTION
178184
* Updates the value of a usage record. If no usage record exists with the
@@ -185,7 +191,7 @@ usage_list_set_ulong64_usage(lList *usage_list, const char *name, uint64_t value
185191
* bool create_usage - create the usage element if it does not exist? Default: yes.
186192
*
187193
* NOTES
188-
* MT-NOTE: usage_list_set_double_usage() is MT safe
194+
* MT-NOTE: usage_list_set_double_usage() is MT safe
189195
*
190196
* SEE ALSO
191197
* sgeobj/usage/usage_list_set_ulong_usage()
@@ -236,8 +242,8 @@ usage_list_max_double_usage(lList *usage_list, const char *name, double value, b
236242
* usage_list_sum() -- sum up usage of two lists
237243
*
238244
* SYNOPSIS
239-
* void
240-
* usage_list_sum(lList *usage_list, const lList *add_usage_list)
245+
* void
246+
* usage_list_sum(lList *usage_list, const lList *add_usage_list)
241247
*
242248
* FUNCTION
243249
* Add the usage reported in add_usage_list to usage_list.
@@ -255,7 +261,7 @@ usage_list_max_double_usage(lList *usage_list, const char *name, double value, b
255261
* const lList *add_usage_list - usage to add to usage_list
256262
*
257263
* NOTES
258-
* MT-NOTE: usage_list_sum() is MT safe
264+
* MT-NOTE: usage_list_sum() is MT safe
259265
*******************************************************************************/
260266
void
261267
usage_list_sum(lList *usage_list, const lList *add_usage_list)

source/libs/sgeobj/sge_usage.h

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -44,7 +44,7 @@ enum {
4444

4545
/*
4646
* sge standard usage value names
47-
*
47+
*
4848
* use these defined names for refering special usage values
4949
*/
5050

@@ -82,6 +82,8 @@ enum {
8282
#define USAGE_ATTR_RSS "rss"
8383
#define USAGE_ATTR_MAXRSS "maxrss"
8484

85+
int
86+
usage_list_get_int_usage(const lList *usage_list, const char *name, int def);
8587
uint32_t
8688
usage_list_get_ulong_usage(const lList *usage_list, const char *name, uint32_t def);
8789
uint64_t

0 commit comments

Comments
 (0)