138138 MINUTES ,
139139 HOURS ,
140140 DAYS ,
141+ UPDATE_TOLERATION ,
141142)
142143
143144PGLOG_DIR = "log"
@@ -417,9 +418,12 @@ def waiting_cluster_final_status(
417418 status : kopf .Status ,
418419 logger : logging .Logger ,
419420 timeout : int = MINUTES * 1 ,
420- ) -> None :
421+ except_nodes : int = None ,
422+ ) -> bool :
423+ is_health = True
424+
421425 if spec [ACTION ] == ACTION_STOP :
422- return
426+ return is_health
423427
424428 # waiting for restart
425429 auto_failover_conns = connections (spec , meta , patch ,
@@ -449,6 +453,7 @@ def waiting_cluster_final_status(
449453 if i >= maxtry :
450454 logger .warning (
451455 f"cluster maybe maybe not right. skip waitting." )
456+ is_health = False
452457 break
453458 output = exec_command (conn , primary_cmd , logger , interrupt = False )
454459 if output != '1' :
@@ -475,6 +480,8 @@ def waiting_cluster_final_status(
475480 spec .get (POSTGRESQL ).get (READWRITEINSTANCE ).get (MACHINES )
476481 ) + len (
477482 spec .get (POSTGRESQL ).get (READONLYINSTANCE ).get (MACHINES ))
483+ if except_nodes is not None :
484+ total_nodes = except_nodes
478485 output = exec_command (conn , nodes_cmd , logger , interrupt = False )
479486 if output != str (total_nodes ):
480487 logger .warning (
@@ -484,6 +491,7 @@ def waiting_cluster_final_status(
484491
485492 break
486493 auto_failover_conns .free_conns ()
494+ return is_health
487495
488496
489497def waiting_cluster_correct_status (
@@ -3321,7 +3329,7 @@ def update_antiaffinity(
33213329 timeout : int = MINUTES * 5 ,
33223330) -> None :
33233331 # local volume
3324- if spec .get (SPEC_VOLUME_TYPE ) == SPEC_VOLUME_LOCAL :
3332+ if spec .get (SPEC_VOLUME_TYPE , 'local' ) == SPEC_VOLUME_LOCAL :
33253333 delete_disk = True
33263334 timeout = HOURS * 1
33273335 rolling_update (meta , spec , patch , status , logger , target_roles , exit ,
@@ -3408,6 +3416,8 @@ def update_replicas(
34083416
34093417 need_update_number_sync_standbys = True
34103418
3419+ waiting_cluster_final_status (meta , spec , patch , status , logger , 1 * HOURS )
3420+
34113421 return need_update_number_sync_standbys
34123422
34133423
@@ -4019,6 +4029,48 @@ def local_create_user(OS: List,
40194029 auto_failover_conns .free_conns ()
40204030
40214031
4032+ def get_except_nodes (
4033+ meta : kopf .Meta ,
4034+ spec : kopf .Spec ,
4035+ patch : kopf .Patch ,
4036+ status : kopf .Status ,
4037+ logger : logging .Logger ,
4038+ diffs : kopf .Diff ,
4039+ ) -> int :
4040+ mode , autofailover_replicas , readwrite_replicas , readonly_replicas = get_replicas (
4041+ spec )
4042+ except_readwrite_nodes = readwrite_replicas
4043+ except_readonly_nodes = readonly_replicas
4044+
4045+ for diff in diffs :
4046+ AC = diff [0 ]
4047+ FIELD = diff [1 ]
4048+ OLD = diff [2 ]
4049+ NEW = diff [3 ]
4050+
4051+ if FIELD == DIFF_FIELD_READWRITE_REPLICAS :
4052+ if AC != DIFF_CHANGE :
4053+ logger .error (
4054+ str (DIFF_FIELD_ACTION ) + " only support " + DIFF_CHANGE )
4055+ else :
4056+ except_readwrite_nodes = OLD
4057+
4058+ if FIELD == DIFF_FIELD_READWRITE_MACHINES :
4059+ if AC != DIFF_CHANGE :
4060+ logger .error (
4061+ str (DIFF_FIELD_ACTION ) + " only support " + DIFF_CHANGE )
4062+ else :
4063+ except_readwrite_nodes = len (OLD )
4064+
4065+ if FIELD == DIFF_FIELD_READONLY_REPLICAS :
4066+ except_readwrite_nodes = OLD
4067+
4068+ if FIELD == DIFF_FIELD_READONLY_MACHINES :
4069+ except_readwrite_nodes = len (OLD )
4070+
4071+ return except_readwrite_nodes + except_readonly_nodes
4072+
4073+
40224074# kubectl patch pg lzzhang --patch '{"spec": {"action": "stop"}}' --type=merge
40234075def update_cluster (
40244076 meta : kopf .Meta ,
@@ -4035,6 +4087,8 @@ def update_cluster(
40354087 check_param (spec , logger , create = False )
40364088 need_roll_update = False
40374089 need_update_number_sync_standbys = False
4090+ update_toleration = spec .get (UPDATE_TOLERATION , False )
4091+ except_nodes = get_except_nodes (meta , spec , patch , status , logger , diffs )
40384092
40394093 for diff in diffs :
40404094 AC = diff [0 ]
@@ -4055,10 +4109,25 @@ def update_cluster(
40554109 OLD = diff [2 ]
40564110 NEW = diff [3 ]
40574111
4112+ if update_toleration == False and waiting_cluster_final_status (meta , spec , patch , status , logger , except_nodes = except_nodes ) == False :
4113+ logger .error (f"cluster status is not health." )
4114+ raise kopf .PermanentError (f"cluster status is not health." )
4115+
40584116 return_update_number_sync_standbys = update_replicas (meta , spec , patch , status , logger , AC , FIELD , OLD ,
40594117 NEW )
40604118 if need_update_number_sync_standbys == False and return_update_number_sync_standbys == True :
40614119 need_update_number_sync_standbys = True
4120+
4121+ for diff in diffs :
4122+ AC = diff [0 ]
4123+ FIELD = diff [1 ]
4124+ OLD = diff [2 ]
4125+ NEW = diff [3 ]
4126+
4127+ if update_toleration == False and waiting_cluster_final_status (meta , spec , patch , status , logger ) == False :
4128+ logger .error (f"cluster status is not health." )
4129+ raise kopf .PermanentError (f"cluster status is not health." )
4130+
40624131 update_podspec_volume (meta , spec , patch , status , logger , AC , FIELD ,
40634132 OLD , NEW )
40644133 if FIELD [0 :len (DIFF_FIELD_SPEC_ANTIAFFINITY
@@ -4077,6 +4146,10 @@ def update_cluster(
40774146 OLD = diff [2 ]
40784147 NEW = diff [3 ]
40794148
4149+ if update_toleration == False and waiting_cluster_final_status (meta , spec , patch , status , logger ) == False :
4150+ logger .error (f"cluster status is not health." )
4151+ raise kopf .PermanentError (f"cluster status is not health." )
4152+
40804153 update_hbas (meta , spec , patch , status , logger , AC , FIELD , OLD , NEW )
40814154 update_users (meta , spec , patch , status , logger , AC , FIELD , OLD ,
40824155 NEW )
0 commit comments