Skip to content

Commit 9d89508

Browse files
authored
Start serial registration in early daemon initialization (#648)
On some devices (e.g., Xiaomi Android 12 [Xiaomi/toco_ru/toco:12/RKQ1.210614.002/V13.0.4.0.SFNRUXM:user/release-keys]), the daemon takes longer to initialize, causing the Zygisk module in system_server to time out after the default 3 seconds while waiting for the proxy service. We address the race condition with the following changes: - Move proxy service registration (`SystemServerService.registerProxyService`) to the earliest possible phase in daemon initialization, before environmental setup and DEX preloading. - Increase the Zygisk IPC waiting retries in `ipc_bridge.cpp` from 3 to 10 seconds to ensure successful communication on slower ROMs. - Refactor `SystemServerService` into a singleton object. - Move `system_server` crash recovery and restart logic directly into `VectorDaemon.kt` to better manage retry states during reinjection. - Clean up variable usage and improve code comments to accurately reflect the proxy replacement lifecycle.
1 parent e811522 commit 9d89508

File tree

3 files changed

+68
-67
lines changed

3 files changed

+68
-67
lines changed

daemon/src/main/kotlin/org/matrix/vector/daemon/VectorDaemon.kt

Lines changed: 41 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@ import android.os.Looper
1111
import android.os.Parcel
1212
import android.os.Process
1313
import android.os.ServiceManager
14+
import android.os.SystemProperties
1415
import android.system.Os
1516
import android.util.Log
1617
import kotlinx.coroutines.CoroutineExceptionHandler
@@ -41,6 +42,7 @@ object VectorDaemon {
4142
// Dispatchers.IO: Uses the shared background thread pool.
4243
// SupervisorJob(): Ensures one failing task doesn't kill the whole daemon.
4344
val scope = CoroutineScope(Dispatchers.IO + SupervisorJob() + exceptionHandler)
45+
val bridgeServiceName = "activity"
4446

4547
var isLateInject = false
4648
var proxyServiceName = "serial"
@@ -67,6 +69,14 @@ object VectorDaemon {
6769
kotlin.system.exitProcess(1)
6870
}
6971

72+
// Setup Main Looper
73+
Process.setThreadPriority(Process.THREAD_PRIORITY_FOREGROUND)
74+
@Suppress("DEPRECATION") Looper.prepareMainLooper()
75+
76+
// Squat on the proxy service name immediately, which creates the early IPC channel of
77+
// ApplicationService for our Zygisk module during system_server specialization.
78+
SystemServerService.registerProxyService(proxyServiceName)
79+
7080
// Start Environmental Daemons
7181
LogcatMonitor.start()
7282
if (Build.VERSION.SDK_INT >= Build.VERSION_CODES.Q) Dex2OatServer.start()
@@ -75,27 +85,20 @@ object VectorDaemon {
7585
// Preload Framework DEX in the background
7686
scope.launch { FileSystem.getPreloadDex(ConfigCache.state.isDexObfuscateEnabled) }
7787

78-
// Setup Main Looper & System Services
79-
Process.setThreadPriority(Process.THREAD_PRIORITY_FOREGROUND)
80-
@Suppress("DEPRECATION") Looper.prepareMainLooper()
81-
82-
val systemServerService = SystemServerService(systemServerMaxRetry, proxyServiceName)
83-
systemServerService.putBinderForSystemServer()
84-
8588
// Initializes system frameworks inside the daemon process
8689
ActivityThread.systemMain()
8790
DdmHandleAppName.setAppName("org.matrix.vector.daemon", 0)
8891

89-
// Wait for Android Core Services
92+
// Wait for Android core services
9093
waitForSystemService("package")
91-
waitForSystemService("activity")
94+
waitForSystemService("activity") // current bridgeServiceName
9295
waitForSystemService(Context.USER_SERVICE)
9396
waitForSystemService(Context.APP_OPS_SERVICE)
9497

9598
applyNotificationWorkaround()
9699

97-
// Inject Vector into system_server
98-
sendToBridge(VectorService.asBinder(), isRestart = false, systemServerService)
100+
// Setup IPC channel for applications by injecting DaemonService binder
101+
sendToBridge(VectorService.asBinder(), false, systemServerMaxRetry)
99102

100103
if (!ManagerService.isVerboseLog()) {
101104
LogcatMonitor.stopVerbose()
@@ -112,11 +115,12 @@ object VectorDaemon {
112115
}
113116
}
114117

118+
// The bridge is setup in `system_server` via Zygisk API
115119
@Suppress("DEPRECATION")
116120
private fun sendToBridge(
117121
binder: IBinder,
118122
isRestart: Boolean,
119-
systemServerService: SystemServerService
123+
restartRetry: Int,
120124
) {
121125
check(Looper.myLooper() == Looper.getMainLooper()) {
122126
"sendToBridge MUST run on the main thread!"
@@ -126,12 +130,12 @@ object VectorDaemon {
126130

127131
runCatching {
128132
var bridgeService: IBinder?
129-
if (isRestart) Log.w(TAG, "System Server restarted...")
133+
if (isRestart) Log.w(TAG, "system_server restarted...")
130134

131135
while (true) {
132-
bridgeService = ServiceManager.getService("activity")
136+
bridgeService = ServiceManager.getService(bridgeServiceName)
133137
if (bridgeService?.pingBinder() == true) break
134-
Log.i(TAG, "activity service not ready, waiting 1s...")
138+
Log.i(TAG, "`$bridgeServiceName` service not ready, waiting 1s...")
135139
Thread.sleep(1000)
136140
}
137141

@@ -142,10 +146,13 @@ object VectorDaemon {
142146
Log.w(TAG, "System Server died! Clearing caches and re-injecting...")
143147
bridgeService.unlinkToDeath(this, 0)
144148
clearSystemCaches()
145-
systemServerService.putBinderForSystemServer()
149+
SystemServerService.binderDied() // Cleanup old references
150+
// Re-claim the service name immediately to ensure that when system_server
151+
// restarts, our proxy is already there for the Zygisk module to find.
152+
ServiceManager.addService(proxyServiceName, SystemServerService)
146153
ManagerService.guard = null // Remove dead guard
147154
Handler(Looper.getMainLooper()).post {
148-
sendToBridge(binder, isRestart = true, systemServerService)
155+
sendToBridge(binder, true, restartRetry - 1)
149156
}
150157
}
151158
}
@@ -170,13 +177,14 @@ object VectorDaemon {
170177
Thread.sleep(1000)
171178
}
172179

173-
if (success) Log.i(TAG, "Successfully injected Vector into system_server")
174-
else {
175-
Log.e(TAG, "Failed to inject Vector into system_server")
176-
systemServerService.maybeRetryInject()
180+
if (success) {
181+
Log.i(TAG, "Successfully injected Vector IPC binder for applications.")
182+
} else {
183+
Log.e(TAG, "Failed to inject VectorService into system_server")
184+
if (restartRetry > 0) restartSystemServer()
177185
}
178186
}
179-
.onFailure { Log.e(TAG, "Error during System Server bridging", it) }
187+
.onFailure { Log.e(TAG, "Error during injecting DaemonService", it) }
180188
Os.seteuid(1000)
181189
}
182190

@@ -209,4 +217,15 @@ object VectorDaemon {
209217
}
210218
.onFailure { Log.w(TAG, "Failed to clear system caches via reflection", it) }
211219
}
220+
221+
fun restartSystemServer() {
222+
Log.w(TAG, "Restarting system_server...")
223+
val restartTarget =
224+
if (Build.SUPPORTED_64_BIT_ABIS.isNotEmpty() && Build.SUPPORTED_32_BIT_ABIS.isNotEmpty()) {
225+
"zygote_secondary"
226+
} else {
227+
"zygote"
228+
}
229+
SystemProperties.set("ctl.restart", restartTarget)
230+
}
212231
}

daemon/src/main/kotlin/org/matrix/vector/daemon/ipc/SystemServerService.kt

Lines changed: 18 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,6 @@ import android.os.IBinder
55
import android.os.IServiceCallback
66
import android.os.Parcel
77
import android.os.ServiceManager
8-
import android.os.SystemProperties
98
import android.util.Log
109
import org.lsposed.lspd.service.ILSPApplicationService
1110
import org.lsposed.lspd.service.ILSPSystemServerService
@@ -14,44 +13,41 @@ import org.matrix.vector.daemon.system.getSystemServiceManager
1413

1514
private const val TAG = "VectorSystemServer"
1615

17-
class SystemServerService(private val maxRetry: Int, private val proxyServiceName: String) :
18-
ILSPSystemServerService.Stub(), IBinder.DeathRecipient {
16+
object SystemServerService : ILSPSystemServerService.Stub(), IBinder.DeathRecipient {
1917

18+
private var proxyServiceName: String? = null
2019
private var originService: IBinder? = null
21-
private var requestedRetryCount = -maxRetry
2220

23-
companion object {
24-
var systemServerRequested = false
25-
}
21+
var systemServerRequested = false
2622

27-
init {
28-
Log.d(TAG, "registering via proxy $proxyServiceName")
23+
fun registerProxyService(serviceName: String) {
24+
// Register as the service name early to setup an IPC for `system_server`.
25+
Log.d(TAG, "Registering bridge service for `system_server` with name `$serviceName`.")
2926

3027
if (Build.VERSION.SDK_INT >= Build.VERSION_CODES.R) {
3128
val callback =
3229
object : IServiceCallback.Stub() {
30+
// The IServiceCallback will tell us when the real Android service is ready,
31+
// allowing us to capture it and then naturally stop intercepting traffic.
3332
override fun onRegistration(name: String, binder: IBinder?) {
34-
if (name == proxyServiceName &&
35-
binder != null &&
36-
binder !== this@SystemServerService) {
37-
Log.d(TAG, "Intercepted system service registration: $name")
33+
if (name == serviceName && binder != null && binder !== this@SystemServerService) {
34+
Log.d(TAG, "Intercepted system service registration with name `$name`")
3835
originService = binder
3936
runCatching { binder.linkToDeath(this@SystemServerService, 0) }
4037
}
4138
}
4239

4340
override fun asBinder(): IBinder = this
4441
}
45-
runCatching { getSystemServiceManager().registerForNotifications(proxyServiceName, callback) }
42+
runCatching {
43+
getSystemServiceManager().registerForNotifications(serviceName, callback)
44+
ServiceManager.addService(serviceName, this)
45+
proxyServiceName = serviceName
46+
}
4647
.onFailure { Log.e(TAG, "Failed to register IServiceCallback", it) }
4748
}
4849
}
4950

50-
fun putBinderForSystemServer() {
51-
ServiceManager.addService(proxyServiceName, this)
52-
binderDied()
53-
}
54-
5551
override fun requestApplicationService(
5652
uid: Int,
5753
pid: Int,
@@ -69,8 +65,9 @@ class SystemServerService(private val maxRetry: Int, private val proxyServiceNam
6965

7066
override fun onTransact(code: Int, data: Parcel, reply: Parcel?, flags: Int): Boolean {
7167
originService?.let {
72-
// This should however never happen, as service registration enforces later replacements
73-
Log.i(TAG, "Original service $proxyServiceName alive, transmitting requests")
68+
// This is unlikely to happen unless system_server restarts / crashes, since we intentionally
69+
// discard our proxy upon later replacements in registerProxyService.
70+
Log.d(TAG, "Forwarding request to real `$proxyServiceName` service.")
7471
return it.transact(code, data, reply, flags)
7572
}
7673

@@ -103,19 +100,4 @@ class SystemServerService(private val maxRetry: Int, private val proxyServiceNam
103100
originService?.unlinkToDeath(this, 0)
104101
originService = null
105102
}
106-
107-
fun maybeRetryInject() {
108-
if (requestedRetryCount < 0) {
109-
Log.w(TAG, "System server injection fails, triggering restart...")
110-
requestedRetryCount++
111-
val restartTarget =
112-
if (Build.SUPPORTED_64_BIT_ABIS.isNotEmpty() &&
113-
Build.SUPPORTED_32_BIT_ABIS.isNotEmpty()) {
114-
"zygote_secondary"
115-
} else {
116-
"zygote"
117-
}
118-
SystemProperties.set("ctl.restart", restartTarget)
119-
}
120-
}
121103
}

zygisk/src/main/cpp/ipc_bridge.cpp

Lines changed: 9 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -307,23 +307,23 @@ lsplant::ScopedLocalRef<jobject> IPCBridge::RequestSystemServerBinder(
307307
auto service_name = lsplant::ScopedLocalRef(env, env->NewStringUTF(bridgeServiceName.data()));
308308
lsplant::ScopedLocalRef<jobject> binder = {env, nullptr};
309309

310-
// The system_server might start its services slightly after Zygisk injects us.
311-
// We retry a few times to give it a chance to register.
312-
for (int i = 0; i < 3; ++i) {
310+
// The daemon process and system_server specialization run in parallel.
311+
// On slower devices, the daemon may take several seconds to call addService.
312+
// We poll for up to 10 seconds to ensure the early IPC channel for system_server is available.
313+
const int max_retry = 10;
314+
for (int i = 0; i < max_retry; ++i) {
313315
binder = lsplant::JNI_CallStaticObjectMethod(env, service_manager_class_,
314316
get_service_method_, service_name.get());
315317
if (binder) {
316318
LOGI("Got system server binder via {} on attempt {}.", bridgeServiceName.data(), i + 1);
317319
return binder;
318320
}
319-
if (i < 2) {
320-
LOGW("Failed to get system server binder via {}, will retry in 1 second...",
321-
bridgeServiceName.data());
322-
std::this_thread::sleep_for(std::chrono::seconds(1));
323-
}
321+
LOGW("Failed to get system server binder via {}, will retry in 1 second...",
322+
bridgeServiceName.data());
323+
std::this_thread::sleep_for(std::chrono::seconds(1));
324324
}
325325

326-
LOGE("Failed to get system server binder after 3 attempts. Aborting.");
326+
LOGE("Failed to get system server binder after {} attempts. Aborting.", max_retry);
327327
return {env, nullptr};
328328
}
329329

0 commit comments

Comments
 (0)