Skip to content

Commit dab8261

Browse files
authored
[QC-1038] fix Race condition in Service Discovery (#2024)
* [QC-1038] fix Race condition in Service Discovery * format * add missing include * add missing include
1 parent 85f30bc commit dab8261

2 files changed

Lines changed: 20 additions & 3 deletions

File tree

Framework/include/QualityControl/ServiceDiscovery.h

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,8 @@
2020
#include <memory>
2121
#include <string>
2222
#include <thread>
23+
#include <mutex>
24+
#include <condition_variable>
2325

2426
typedef void CURL;
2527

@@ -65,6 +67,9 @@ class ServiceDiscovery
6567
const std::string mId; ///< Instance (service) ID
6668
std::string mHealthUrl; ///< hostname of health check endpoint
6769
size_t mHealthPort; ///< Port of the health check endpoint
70+
std::mutex mHealthPortMutex; ///< Mutex for the port
71+
std::condition_variable mHealthPortCV; ///< Condition varaible for the port
72+
std::atomic<bool> mHealthPortAssigned; ///< Port of the health check is ready.
6873
std::thread mHealthThread; ///< Health check thread
6974
std::atomic<bool> mThreadRunning; ///< Health check thread running flag
7075

Framework/src/ServiceDiscovery.cxx

Lines changed: 15 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,7 @@ ServiceDiscovery::ServiceDiscovery(const std::string& url, const std::string& na
3838
: curlHandle(initCurl(), &ServiceDiscovery::deleteCurl), mConsulUrl(url), mName(name), mId(id), mHealthUrl(healthEndUrl)
3939
{
4040
mHealthUrl = mHealthUrl.empty() ? boost::asio::ip::host_name() : mHealthUrl;
41+
mHealthPortAssigned = false;
4142

4243
mHealthThread = std::thread([=] {
4344
#ifdef __linux__
@@ -96,6 +97,11 @@ bool ServiceDiscovery::_register(const std::string& objects)
9697
check.put("Name", "Health check " + mId);
9798
check.put("Interval", "5s");
9899
check.put("DeregisterCriticalServiceAfter", "1m");
100+
// Wait until port is set by the thread
101+
{
102+
std::unique_lock<std::mutex> lk(mHealthPortMutex);
103+
mHealthPortCV.wait(lk, [this] { return mHealthPortAssigned == true; });
104+
}
99105
check.put("TCP", mHealthUrl + ":" + std::to_string(mHealthPort));
100106
checks.push_back(std::make_pair("", check));
101107

@@ -151,12 +157,18 @@ void ServiceDiscovery::runHealthServer()
151157
ILOG(Debug, Trace) << "ServiceDiscovery::runHealthServer - cound not bind to " << port << ENDM;
152158
continue; // try the next one
153159
}
154-
// if we reach this point it means that we could bind
155-
mHealthPort = port;
156-
ILOG(Debug, Devel) << "ServiceDiscovery selected port: " << mHealthPort << ENDM;
157160
break;
158161
}
159162

163+
// assign the port and unblock the main thread (we got a port or we failed)
164+
{
165+
ILOG(Debug, Devel) << "ServiceDiscovery selected port: " << mHealthPort << ENDM;
166+
std::lock_guard<std::mutex> lk(mHealthPortMutex);
167+
mHealthPort = port;
168+
mHealthPortAssigned = true;
169+
}
170+
mHealthPortCV.notify_one();
171+
160172
if (cycle == rangeLength) {
161173
ILOG(Error, Support) << "Could not find a free port for the ServiceDiscovery, aborting the ServiceDiscovery health check" << ENDM;
162174
return;

0 commit comments

Comments
 (0)