Skip to content

Commit f7e2be6

Browse files
gurghetclaude
andcommitted
fix: prevent race condition between on.create and reconcile timer
Add initial_delay=60.0 to timer and grace period check for newly created CRs (< 90s old with no keyId). This prevents duplicate deploy key creation when both handlers fire before status is patched. Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
1 parent 6829d82 commit f7e2be6

1 file changed

Lines changed: 15 additions & 3 deletions

File tree

operator.py

Lines changed: 15 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
import kubernetes
44
import base64
55
import github
6+
from datetime import datetime, timezone
67
from cryptography.hazmat.primitives import serialization
78
from cryptography.hazmat.primitives.asymmetric import rsa
89
from cryptography.hazmat.backends import default_backend
@@ -372,22 +373,33 @@ def delete_deploy_key(spec, meta, status, logger, **kwargs):
372373

373374
logger.info(f"Secret {meta['name']}-private-key will be deleted by garbage collection")
374375

375-
@kopf.timer('github.com', 'v1alpha1', 'githubdeploykeys', interval=60.0)
376+
@kopf.timer('github.com', 'v1alpha1', 'githubdeploykeys', interval=60.0, initial_delay=60.0)
376377
def reconcile_deploy_key(spec, status, logger, patch, **kwargs):
377378
"""Periodically reconcile the deploy key to ensure it exists."""
378379
github_manager = GitHubKeyManager(logger)
379-
380+
380381
try:
381382
repo = github_manager.get_repository(spec['repository'])
382383
key_id = status.get('keyId') if status else None
383384
base_title = spec.get('title', 'Kubernetes-managed deploy key')
384385
managed_title = f"k8s-operator:{base_title}"
385-
386+
386387
# Note: We no longer delete "stale" keys here. This caused a race condition where
387388
# a newly created key (not yet in status) would be deleted as stale.
388389
# Key cleanup is handled by create_deploy_key via delete_keys_by_title.
389390

390391
if not key_id:
392+
# Check if CR was recently created - if so, let on.create handle it
393+
# This prevents a race condition where both on.create and reconcile timer
394+
# fire simultaneously on new CRs before status is patched
395+
creation_time_str = kwargs['body']['metadata']['creationTimestamp']
396+
creation_time = datetime.fromisoformat(creation_time_str.replace('Z', '+00:00'))
397+
age_seconds = (datetime.now(timezone.utc) - creation_time).total_seconds()
398+
399+
if age_seconds < 90:
400+
logger.info(f"CR created {age_seconds:.0f}s ago with no keyId - letting on.create handle it")
401+
return
402+
391403
logger.info("No key ID in status, recreating deploy key")
392404
create_deploy_key(spec, status, logger, patch, force=True, **kwargs)
393405
return

0 commit comments

Comments
 (0)