Skip to content

Commit 2b063c5

Browse files
committed
EH: CS-2064: sge_do_log: support log file rotation with persistent fd
1 parent 5f118f0 commit 2b063c5

1 file changed

Lines changed: 40 additions & 14 deletions

File tree

source/libs/uti/sge_log.cc

Lines changed: 40 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,7 @@
3737
#include <fcntl.h>
3838
#include <fnmatch.h>
3939
#include <pthread.h>
40+
#include <sys/stat.h>
4041

4142
#include "uti/msg_utilib.h"
4243
#include "uti/sge_dstring.h"
@@ -58,9 +59,11 @@ typedef struct {
5859
int log_level;
5960
int log_as_admin_user;
6061
int verbose;
62+
int log_fd; ///< persistent log file descriptor; -1 when not open
63+
uint64_t last_inode_check; ///< gmt64 timestamp of last rotation inode check
6164
} log_state_t;
6265

63-
static log_state_t Log_State = {PTHREAD_MUTEX_INITIALIZER, TMP_ERR_FILE_SNBU, LOG_WARNING, 0, 1} ;
66+
static log_state_t Log_State = {PTHREAD_MUTEX_INITIALIZER, TMP_ERR_FILE_SNBU, LOG_WARNING, 0, 1, -1, 0};
6467

6568
static void
6669
sge_do_log(uint32_t prog_number, const char *prog_or_thread_name, int thread_id,
@@ -73,22 +76,40 @@ sge_do_log(uint32_t prog_number, const char *prog_or_thread_name, int thread_id,
7376
}
7477

7578
if (prog_number == QMASTER || prog_number == EXECD || prog_number == SCHEDD || prog_number == SHADOWD) {
76-
int fd = SGE_OPEN3(log_state_get_log_file(), O_WRONLY | O_APPEND | O_CREAT, 0666);
77-
if (fd >= 0) {
78-
// initialize static dstring
79-
DSTRING_STATIC(msg_dstr, 4 * MAX_STRING_SIZE);
79+
// format the log line before acquiring the lock to minimise lock hold time
80+
DSTRING_STATIC(msg_dstr, 4 * MAX_STRING_SIZE);
81+
uint64_t now = sge_get_gmt64();
82+
sge_ctime64(now, &msg_dstr, false, true);
83+
const char *msg_str = sge_dstring_sprintf_append(&msg_dstr, "|%12.12s|%02d|%s|%c|%s\n", prog_or_thread_name, thread_id, unqualified_hostname, level, msg);
84+
const ssize_t len = sge_dstring_strlen(&msg_dstr);
8085

81-
// write log message to dstring
82-
sge_ctime64(sge_get_gmt64(), &msg_dstr, false, true);
83-
const char *msg_str = sge_dstring_sprintf_append(&msg_dstr, "|%12.12s|%02d|%s|%c|%s\n", prog_or_thread_name, thread_id, unqualified_hostname, level, msg);
86+
sge_mutex_lock("Log_State_Lock", __func__, __LINE__, &Log_State.mutex);
8487

85-
// write the buffer to file
86-
ssize_t len = sge_dstring_strlen(&msg_dstr);
88+
// throttled inode check: detect log rotation at most once every 5 seconds
89+
if (Log_State.log_fd >= 0 && now - Log_State.last_inode_check > 5 * 1000000ULL) {
90+
Log_State.last_inode_check = now;
91+
struct stat st_fd{}, st_path{};
92+
if (fstat(Log_State.log_fd, &st_fd) == 0 && stat(Log_State.log_file, &st_path) == 0 && st_fd.st_ino != st_path.st_ino) {
93+
// log file was rotated — reopen against the new path
94+
close(Log_State.log_fd);
95+
Log_State.log_fd = -1;
96+
}
97+
}
98+
99+
// open once and keep open
100+
if (Log_State.log_fd < 0) {
101+
Log_State.log_fd = SGE_OPEN3(Log_State.log_file, O_WRONLY | O_APPEND | O_CREAT, 0666);
102+
Log_State.last_inode_check = now;
103+
}
104+
const int fd = Log_State.log_fd;
105+
106+
sge_mutex_unlock("Log_State_Lock", __func__, __LINE__, &Log_State.mutex);
107+
108+
// write is outside the lock; O_APPEND makes individual write() calls atomic
109+
if (fd >= 0) {
87110
if (write(fd, msg_str, len) != len) {
88-
// write to stderr if logging failed
89111
fprintf(stderr, "can't log to file %s: %s\n", log_state_get_log_file(), sge_strerror(errno, &msg_dstr));
90112
}
91-
close(fd);
92113
}
93114
}
94115
}
@@ -196,9 +217,14 @@ void log_state_set_log_level(uint32_t theLevel) {
196217
sge_mutex_unlock("Log_State_Lock", __func__, __LINE__, &Log_State.mutex);
197218
}
198219

199-
void log_state_set_log_file(const char *theFile) {
220+
void log_state_set_log_file(const char *file) {
200221
sge_mutex_lock("Log_State_Lock", __func__, __LINE__, &Log_State.mutex);
201-
Log_State.log_file = theFile;
222+
// close the persistent fd so it is re-opened against the new path on next use
223+
if (Log_State.log_fd >= 0) {
224+
close(Log_State.log_fd);
225+
Log_State.log_fd = -1;
226+
}
227+
Log_State.log_file = file;
202228
sge_mutex_unlock("Log_State_Lock", __func__, __LINE__, &Log_State.mutex);
203229
}
204230

0 commit comments

Comments
 (0)