|
45 | 45 | import asyncio |
46 | 46 | import binascii |
47 | 47 | from datetime import datetime, timezone |
| 48 | +import json |
48 | 49 | import logging |
49 | 50 | import mimetypes |
50 | 51 | import os |
| 52 | +import socket |
51 | 53 | import ssl |
52 | 54 | import tempfile |
53 | 55 | import time |
@@ -471,15 +473,17 @@ def __init__(self) -> None: |
471 | 473 |
|
472 | 474 | # Leader election settings from config |
473 | 475 | if self.redis_url and REDIS_AVAILABLE: |
474 | | - self._instance_id = str(uuid.uuid4()) # Unique ID for this process |
| 476 | + # Metadata for this instance |
| 477 | + self._instance_metadata = {"instance_id": str(uuid.uuid4()), "port": settings.port, "pid": os.getpid(), "hostname": socket.gethostname()} |
| 478 | + self._instance_id = self._instance_metadata["instance_id"] |
475 | 479 | self._leader_key = settings.redis_leader_key |
476 | 480 | self._leader_ttl = settings.redis_leader_ttl |
477 | 481 | self._leader_heartbeat_interval = settings.redis_leader_heartbeat_interval |
478 | 482 | self._leader_heartbeat_task: Optional[asyncio.Task] = None |
479 | 483 | self._follower_election_task: Optional[asyncio.Task] = None |
480 | 484 |
|
481 | | - # Log instance mapping for debugging |
482 | | - logger.info(f"Instance started: instance_id={self._instance_id}, port={settings.port}, pid={os.getpid()}") |
| 485 | + # Log instance mapping |
| 486 | + logger.info(f"Instance started: {json.dumps(self._instance_metadata)}") |
483 | 487 |
|
484 | 488 | # Always initialize file lock as fallback (used if Redis connection fails at runtime) |
485 | 489 | if settings.cache_type != "none": |
@@ -587,7 +591,8 @@ async def initialize(self) -> None: |
587 | 591 | except Exception as e: |
588 | 592 | raise ConnectionError(f"Redis ping failed: {e}") from e |
589 | 593 |
|
590 | | - is_leader = await self._redis_client.set(self._leader_key, self._instance_id, ex=self._leader_ttl, nx=True) |
| 594 | + # Store all instance metadate in redis |
| 595 | + is_leader = await self._redis_client.set(self._leader_key, json.dumps(self._instance_metadata), ex=self._leader_ttl, nx=True) |
591 | 596 | if is_leader: |
592 | 597 | logger.info("Acquired Redis leadership. Starting health check and heartbeat tasks.") |
593 | 598 | self._health_check_task = asyncio.create_task(self._run_health_checks(user_email)) |
@@ -653,7 +658,7 @@ async def shutdown(self) -> None: |
653 | 658 | return 0 |
654 | 659 | end |
655 | 660 | """ |
656 | | - result = await self._redis_client.eval(release_script, 1, self._leader_key, self._instance_id) |
| 661 | + result = await self._redis_client.eval(release_script, 1, self._leader_key, json.dumps(self._instance_metadata)) |
657 | 662 | if result: |
658 | 663 | logger.info("Released Redis leadership on shutdown") |
659 | 664 | except Exception as e: |
@@ -3941,8 +3946,18 @@ async def _run_leader_heartbeat(self) -> None: |
3941 | 3946 | continue |
3942 | 3947 |
|
3943 | 3948 | # Check if we're still the leader |
3944 | | - current_leader = await self._redis_client.get(self._leader_key) |
3945 | | - if current_leader != self._instance_id: |
| 3949 | + current_leader_raw = await self._redis_client.get(self._leader_key) |
| 3950 | + if current_leader_raw: |
| 3951 | + try: |
| 3952 | + current_leader_data = json.loads(current_leader_raw) |
| 3953 | + current_leader_id = current_leader_data.get("instance_id") |
| 3954 | + except (json.JSONDecodeError, AttributeError): |
| 3955 | + # Fallback for old UUID-only format |
| 3956 | + current_leader_id = current_leader_raw |
| 3957 | + else: |
| 3958 | + current_leader_id = None |
| 3959 | + |
| 3960 | + if current_leader_id != self._instance_id: |
3946 | 3961 | logger.info("Lost Redis leadership, stopping heartbeat") |
3947 | 3962 | self._start_follower_election() |
3948 | 3963 | return |
@@ -3985,7 +4000,7 @@ async def _run_follower_election(self, user_email: str) -> None: |
3985 | 4000 | continue |
3986 | 4001 |
|
3987 | 4002 | # Attempt to acquire leadership |
3988 | | - is_leader = await self._redis_client.set(self._leader_key, self._instance_id, ex=self._leader_ttl, nx=True) |
| 4003 | + is_leader = await self._redis_client.set(self._leader_key, json.dumps(self._instance_metadata), ex=self._leader_ttl, nx=True) |
3989 | 4004 |
|
3990 | 4005 | if is_leader: |
3991 | 4006 | logger.info("Acquired Redis leadership via follower election. Starting health check and heartbeat.") |
@@ -4034,8 +4049,18 @@ async def _run_health_checks(self, user_email: str) -> None: |
4034 | 4049 | if self._redis_client and settings.cache_type == "redis": |
4035 | 4050 | # Redis-based leader check (async, decode_responses=True returns strings) |
4036 | 4051 | # Note: Leader key TTL refresh is handled by _run_leader_heartbeat task |
4037 | | - current_leader = await self._redis_client.get(self._leader_key) |
4038 | | - if current_leader != self._instance_id: |
| 4052 | + current_leader_raw = await self._redis_client.get(self._leader_key) |
| 4053 | + if current_leader_raw: |
| 4054 | + try: |
| 4055 | + current_leader_data = json.loads(current_leader_raw) |
| 4056 | + current_leader_id = current_leader_data.get("instance_id") |
| 4057 | + except (json.JSONDecodeError, AttributeError): |
| 4058 | + # Fallback for old UUID-only format |
| 4059 | + current_leader_id = current_leader_raw |
| 4060 | + else: |
| 4061 | + current_leader_id = None |
| 4062 | + |
| 4063 | + if current_leader_id != self._instance_id: |
4039 | 4064 | return |
4040 | 4065 |
|
4041 | 4066 | # Run health checks |
@@ -4087,6 +4112,67 @@ async def _run_health_checks(self, user_email: str) -> None: |
4087 | 4112 | logger.error(f"Unexpected error in health check loop: {str(e)}") |
4088 | 4113 | await asyncio.sleep(self._health_check_interval) |
4089 | 4114 |
|
| 4115 | + def is_leader_sync(self) -> bool: |
| 4116 | + """Check if this instance is the current leader (synchronous version for health checks). |
| 4117 | +
|
| 4118 | + Returns: |
| 4119 | + bool: True if this instance holds the leader lock, False otherwise. |
| 4120 | + """ |
| 4121 | + if not self._redis_client or not hasattr(self, "_leader_key"): |
| 4122 | + # Fallback to file lock for non-Redis setups |
| 4123 | + return True |
| 4124 | + |
| 4125 | + try: |
| 4126 | + # Use sync Redis client method if available |
| 4127 | + import redis |
| 4128 | + if isinstance(self._redis_client, redis.asyncio.Redis): |
| 4129 | + # For async Redis client, we can't do sync call - return True as fallback |
| 4130 | + return True |
| 4131 | + |
| 4132 | + # For sync Redis client (shouldn't happen in current setup, but safe fallback) |
| 4133 | + current_leader_raw = self._redis_client.get(self._leader_key) |
| 4134 | + if not current_leader_raw: |
| 4135 | + return False |
| 4136 | + |
| 4137 | + try: |
| 4138 | + current_leader_data = json.loads(current_leader_raw) |
| 4139 | + current_leader_id = current_leader_data.get("instance_id") |
| 4140 | + except (json.JSONDecodeError, AttributeError): |
| 4141 | + # Fallback for old UUID-only format |
| 4142 | + current_leader_id = current_leader_raw |
| 4143 | + |
| 4144 | + return current_leader_id == self._instance_id |
| 4145 | + except Exception as e: |
| 4146 | + logger.warning(f"Error checking leader status: {e}") |
| 4147 | + return True # Fail open for health checks |
| 4148 | + |
| 4149 | + async def is_leader(self) -> bool: |
| 4150 | + """Check if this instance is the current leader (async version). |
| 4151 | +
|
| 4152 | + Returns: |
| 4153 | + bool: True if this instance holds the leader lock, False otherwise. |
| 4154 | + """ |
| 4155 | + if not self._redis_client or not hasattr(self, "_leader_key"): |
| 4156 | + # Fallback to file lock for non-Redis setups |
| 4157 | + return True |
| 4158 | + |
| 4159 | + try: |
| 4160 | + current_leader_raw = await self._redis_client.get(self._leader_key) |
| 4161 | + if not current_leader_raw: |
| 4162 | + return False |
| 4163 | + |
| 4164 | + try: |
| 4165 | + current_leader_data = json.loads(current_leader_raw) |
| 4166 | + current_leader_id = current_leader_data.get("instance_id") |
| 4167 | + except (json.JSONDecodeError, AttributeError): |
| 4168 | + # Fallback for old UUID-only format |
| 4169 | + current_leader_id = current_leader_raw |
| 4170 | + |
| 4171 | + return current_leader_id == self._instance_id |
| 4172 | + except Exception as e: |
| 4173 | + logger.warning(f"Error checking leader status: {e}") |
| 4174 | + return False |
| 4175 | + |
4090 | 4176 | def _get_auth_headers(self) -> Dict[str, str]: |
4091 | 4177 | """Get default headers for gateway requests (no authentication). |
4092 | 4178 |
|
|
0 commit comments