Skip to content

Commit 808da6a

Browse files
committed
feat: update subnet configuration and enhance process management
- Changed subnet ID and updated parent registry and gateway addresses in `ipc-subnet-config.yml` for improved network configuration. - Enhanced `exec.sh` to check process status more reliably by capturing output and trimming whitespace. - Improved `health.sh` to ensure proper cleanup of stale RocksDB locks and added fallback mechanisms for stopping processes, enhancing robustness during node shutdown. - Added validation for extracted subnet ID against on-chain subnet list to ensure consistency and correctness in subnet management.
1 parent 49b5918 commit 808da6a

3 files changed

Lines changed: 54 additions & 6 deletions

File tree

scripts/ipc-subnet-manager/ipc-subnet-config.yml

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -4,17 +4,17 @@
44
# Subnet Configuration
55
subnet:
66
# Subnet ID - get this from your subnet creation
7-
id: "/r314159/t410fdg2vwrukcxeopg3sfslrmsf2fkxjyzfldcnf7kq"
7+
id: "/r314159/t410fko243732vbebcpg32q6lbcnhmmqlkfyvu36ibdi"
88
# Parent chain RPC endpoint
99
parent_rpc: "https://api.calibration.node.glif.io/rpc/v1"
1010
#parent_rpc: "http://localhost:8555"
1111

1212
# Parent chain ID
1313
parent_chain_id: "/r314159"
1414
# Parent registry contract address
15-
parent_registry: "0x3527b860bfe033601b5cfda1dbaaaeb0e81033be"
15+
parent_registry: "0x618dc98241aa36710d83de2927eeb50ccd1c853d"
1616
# Parent gateway contract address
17-
parent_gateway: "0x46eb516b535b990825a3171a28548af072975296"
17+
parent_gateway: "0x9e78a3c81023b8f4edb16f2975893c76b3265a82"
1818
# Validator Nodes (GCP hosts: ipc-chain-test-1, ipc-chain-test-2, ipc-chain-test-3)
1919
# ip: external IP (for SSH, RPC). internal_ip: for CometBFT/libp2p peer connections (same VPC)
2020
validators:
@@ -129,8 +129,8 @@ ipc_cli:
129129
network_type: "fevm"
130130
provider_http: "https://api.calibration.node.glif.io/rpc/v1"
131131
#provider_http: "http://localhost:8555"
132-
registry_addr: "0x3527b860bfe033601b5cfda1dbaaaeb0e81033be"
133-
gateway_addr: "0x46eb516b535b990825a3171a28548af072975296"
132+
registry_addr: "0x618dc98241aa36710d83de2927eeb50ccd1c853d"
133+
gateway_addr: "0x9e78a3c81023b8f4edb16f2975893c76b3265a82"
134134
# Child subnet configuration (this subnet)
135135
child:
136136
# Uses subnet.id from above

scripts/ipc-subnet-manager/lib/exec.sh

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -155,7 +155,9 @@ check_process_running() {
155155
local ip=$(get_config_value "validators[$validator_idx].ip")
156156
local ssh_user=$(get_config_value "validators[$validator_idx].ssh_user")
157157
local ipc_user=$(get_config_value "validators[$validator_idx].ipc_user")
158-
ssh_check_process "$ip" "$ssh_user" "$ipc_user" "$process_pattern"
158+
local status
159+
status=$(ssh_check_process "$ip" "$ssh_user" "$ipc_user" "$process_pattern" 2>/dev/null | tr -d '\r\n ')
160+
[ "$status" = "running" ]
159161
fi
160162
}
161163

scripts/ipc-subnet-manager/lib/health.sh

Lines changed: 46 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -34,11 +34,18 @@ stop_all_nodes() {
3434
log_info "Stopping $name..."
3535
kill_process "$idx" "ipc-cli node start"
3636
# Also stop child processes that can keep RocksDB LOCK files open.
37+
kill_process "$idx" "fendermint"
3738
kill_process "$idx" "/target/release/fendermint"
3839
kill_process "$idx" "fendermint/app/src/service/node.rs"
3940
kill_process "$idx" "cometbft start"
41+
kill_process "$idx" "cometbft"
4042
kill_process "$idx" "/cometbft"
4143

44+
# Fallback non-sudo process kill when sudo -n is unavailable.
45+
exec_on_host_simple "$idx" "pkill -f \"ipc-cli node start\" >/dev/null 2>&1 || true" || true
46+
exec_on_host_simple "$idx" "pkill -f \"fendermint\" >/dev/null 2>&1 || true" || true
47+
exec_on_host_simple "$idx" "pkill -f \"cometbft\" >/dev/null 2>&1 || true" || true
48+
4249
# Best-effort systemd stop (some hosts run ipc-node service directly).
4350
if ! is_local_mode; then
4451
local ip
@@ -51,9 +58,30 @@ stop_all_nodes() {
5158

5259
# Wait a moment for graceful shutdown
5360
sleep 2
61+
62+
# Stale LOCK can remain when prior process exits uncleanly.
63+
cleanup_stale_rocksdb_lock "$idx"
5464
done
5565
}
5666

67+
cleanup_stale_rocksdb_lock() {
68+
local validator_idx="$1"
69+
local name="${VALIDATORS[$validator_idx]}"
70+
local node_home
71+
node_home=$(get_node_home "$validator_idx")
72+
local lock_path="$node_home/fendermint/data/rocksdb/LOCK"
73+
74+
if exec_on_host_simple "$validator_idx" "pgrep -f \"fendermint|cometbft|ipc-cli node start\" >/dev/null 2>&1"; then
75+
return 0
76+
fi
77+
78+
if exec_on_host_simple "$validator_idx" "[ -f $lock_path ]"; then
79+
if exec_on_host_simple "$validator_idx" "rm -f $lock_path"; then
80+
log_warn "Removed stale RocksDB lock on $name: $lock_path"
81+
fi
82+
fi
83+
}
84+
5785
stop_all_storage_nodes() {
5886
for idx in "${!VALIDATORS[@]}"; do
5987
local name="${VALIDATORS[$idx]}"
@@ -90,6 +118,7 @@ start_validator_node() {
90118
local subnet_id=$(get_config_value "subnet.id")
91119

92120
log_info "Starting $name..."
121+
cleanup_stale_rocksdb_lock "$validator_idx"
93122

94123
# Use wrapper script to set env vars reliably (avoids SSH quoting issues with sudo su -c '...').
95124
# resolver_enabled() requires: !listen_addr.is_empty() && subnet_id != UNDEF
@@ -666,6 +695,23 @@ EOF
666695
}
667696
' "$ipc_config_file")
668697

698+
# Validate extracted subnet ID against on-chain subnet list.
699+
# If TOML extraction picked a stale local entry, prefer the latest on-chain child subnet.
700+
local onchain_subnets
701+
onchain_subnets=$($ipc_binary_expanded subnet list --parent "$parent_chain_id" 2>/dev/null | awk -v parent="$parent_chain_id" '
702+
$1 ~ ("^" parent "/t[a-z0-9]+$") { print $1 }
703+
')
704+
if [ -n "$onchain_subnets" ]; then
705+
if [ -z "$subnet_id" ] || ! echo "$onchain_subnets" | grep -Fxq "$subnet_id"; then
706+
local onchain_latest
707+
onchain_latest=$(echo "$onchain_subnets" | tail -1)
708+
if [ -n "$onchain_latest" ]; then
709+
log_warn "Extracted subnet ID was not on-chain; using latest on-chain subnet: $onchain_latest" >&2
710+
subnet_id="$onchain_latest"
711+
fi
712+
fi
713+
fi
714+
669715
if [ -z "$subnet_id" ]; then
670716
log_error "Could not extract subnet ID from IPC config at $ipc_config_file" >&2
671717
log_info "Full CLI output:" >&2

0 commit comments

Comments
 (0)