Skip to content

Commit 8b3f609

Browse files
committed
fix: expand contact point hostnames to all DNS IPs at connection time (DRIVER-201)
Addresses the initial-contact-endpoints aspect of DRIVER-201. Problem: with RESOLVE_CONTACT_POINTS=false (the default), a contact point hostname was stored as a single unresolved InetSocketAddress. At connection time the load-balancing query plan contained exactly one Node per hostname, so only the first IP returned by DNS was ever tried. If that IP was non-responsive the driver raised AllNodesFailedException with no fallback to other IPs the hostname might resolve to. Solution (per @dkropachev's architectural direction): - Deprecate DefaultDriverOption.RESOLVE_CONTACT_POINTS. Contact points are now always kept as unresolved hostnames (resolve=false is hardcoded in SessionBuilder), deferring DNS expansion to connection time. - Add MetadataManager.getResolvedContactPoints(): for each contact point backed by an unresolved hostname it calls InetAddress.getAllByName() to expand the hostname to all known IPs, creating a synthetic DefaultNode for each IP. Already-resolved or non-InetSocketAddress endpoints pass through unchanged. - LoadBalancingPolicyWrapper now calls getResolvedContactPoints() instead of getContactPoints() in newQueryPlan() (BEFORE/DURING_INIT states) and newControlReconnectionQueryPlan(), so the query plan contains one node per resolved IP and the driver naturally falls back to the next IP when one is unreachable. Tests: - 4 new MetadataManagerTest cases covering null state, already-resolved passthrough, single-hostname expansion, and multi-endpoint expansion. - LoadBalancingPolicyWrapperTest updated to stub getResolvedContactPoints(). - New MockResolverIT.should_connect_when_first_dns_entry_is_non_responsive integration test: first DNS entry is a non-existent IP, session must open successfully against the remaining real IPs.
1 parent 9149592 commit 8b3f609

8 files changed

Lines changed: 180 additions & 15 deletions

File tree

core/src/main/java/com/datastax/oss/driver/api/core/config/DefaultDriverOption.java

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -837,7 +837,11 @@ public enum DefaultDriverOption implements DriverOption {
837837
* Whether to resolve the addresses passed to `basic.contact-points`.
838838
*
839839
* <p>Value-type: boolean
840+
*
841+
* @deprecated Contact points are now always kept as unresolved hostnames and expanded to all
842+
* their DNS-mapped IPs lazily at connection time. Setting this option has no effect.
840843
*/
844+
@Deprecated
841845
RESOLVE_CONTACT_POINTS("advanced.resolve-contact-points"),
842846

843847
/**

core/src/main/java/com/datastax/oss/driver/api/core/config/TypedDriverOption.java

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -656,7 +656,13 @@ public String toString() {
656656
/** The coalescer reschedule interval. */
657657
public static final TypedDriverOption<Duration> COALESCER_INTERVAL =
658658
new TypedDriverOption<>(DefaultDriverOption.COALESCER_INTERVAL, GenericType.DURATION);
659-
/** Whether to resolve the addresses passed to `basic.contact-points`. */
659+
/**
660+
* Whether to resolve the addresses passed to `basic.contact-points`.
661+
*
662+
* @deprecated Contact points are now always kept as unresolved hostnames and expanded to all
663+
* their DNS-mapped IPs lazily at connection time. Setting this option has no effect.
664+
*/
665+
@Deprecated
660666
public static final TypedDriverOption<Boolean> RESOLVE_CONTACT_POINTS =
661667
new TypedDriverOption<>(DefaultDriverOption.RESOLVE_CONTACT_POINTS, GenericType.BOOLEAN);
662668
/**

core/src/main/java/com/datastax/oss/driver/api/core/session/SessionBuilder.java

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -935,11 +935,10 @@ protected final CompletionStage<CqlSession> buildDefaultSessionAsync() {
935935
programmaticArguments = programmaticArgumentsBuilder.build();
936936
}
937937

938-
boolean resolveAddresses =
939-
defaultConfig.getBoolean(DefaultDriverOption.RESOLVE_CONTACT_POINTS, false);
940-
938+
// RESOLVE_CONTACT_POINTS is deprecated: contact points are always kept as unresolved
939+
// hostnames and expanded to all their DNS IPs lazily at connection time.
941940
Set<EndPoint> contactPoints =
942-
ContactPoints.merge(programmaticContactPoints, configContactPoints, resolveAddresses);
941+
ContactPoints.merge(programmaticContactPoints, configContactPoints, false);
943942

944943
if (keyspace == null && defaultConfig.isDefined(DefaultDriverOption.SESSION_KEYSPACE)) {
945944
keyspace =

core/src/main/java/com/datastax/oss/driver/internal/core/metadata/LoadBalancingPolicyWrapper.java

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -147,8 +147,9 @@ public Queue<Node> newQueryPlan(
147147
switch (stateRef.get()) {
148148
case BEFORE_INIT:
149149
case DURING_INIT:
150-
// The contact points are not stored in the metadata yet:
151-
List<Node> nodes = new ArrayList<>(context.getMetadataManager().getContactPoints());
150+
// The contact points are not stored in the metadata yet; use the expanded list so that
151+
// all DNS-mapped IPs are tried (not just the first one per hostname).
152+
List<Node> nodes = new ArrayList<>(context.getMetadataManager().getResolvedContactPoints());
152153
Collections.shuffle(nodes);
153154
return new ConcurrentLinkedQueue<>(nodes);
154155
case RUNNING:
@@ -170,11 +171,10 @@ public Queue<Node> newControlReconnectionQueryPlan() {
170171
.getConfig()
171172
.getDefaultProfile()
172173
.getBoolean(DefaultDriverOption.CONTROL_CONNECTION_RECONNECT_CONTACT_POINTS)) {
173-
Set<DefaultNode> originalNodes = context.getMetadataManager().getContactPoints();
174-
List<Node> contactNodes = new ArrayList<>();
175-
for (DefaultNode node : originalNodes) {
176-
contactNodes.add(DefaultNode.newContactPoint(node.getEndPoint(), context));
177-
}
174+
// Use the DNS-expanded contact points so that all IPs for each hostname are appended as
175+
// fallback candidates, not just the one that was stored when the session was built.
176+
List<Node> contactNodes =
177+
new ArrayList<>(context.getMetadataManager().getResolvedContactPoints());
178178
Collections.shuffle(contactNodes);
179179
// Append contact points to the end of the regular query plan so they serve as a fallback
180180
regularQueryPlan.addAll(contactNodes);

core/src/main/java/com/datastax/oss/driver/internal/core/metadata/MetadataManager.java

Lines changed: 56 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -49,8 +49,11 @@
4949
import com.datastax.oss.driver.shaded.guava.common.collect.ImmutableSet;
5050
import edu.umd.cs.findbugs.annotations.NonNull;
5151
import io.netty.util.concurrent.EventExecutor;
52+
import java.net.InetAddress;
5253
import java.net.InetSocketAddress;
54+
import java.net.UnknownHostException;
5355
import java.nio.ByteBuffer;
56+
import java.util.ArrayList;
5457
import java.util.Collections;
5558
import java.util.HashMap;
5659
import java.util.List;
@@ -173,6 +176,59 @@ public Set<DefaultNode> getContactPoints() {
173176
return contactPoints;
174177
}
175178

179+
/**
180+
* Returns the contact points expanded to all their DNS-resolved IPs.
181+
*
182+
* <p>For each contact point whose underlying address is an unresolved hostname (i.e. stored as
183+
* {@code InetSocketAddress.createUnresolved(...)} when {@code RESOLVE_CONTACT_POINTS=false}),
184+
* this method calls {@link InetAddress#getAllByName(String)} to obtain every IP the hostname maps
185+
* to and creates a synthetic contact-point {@link DefaultNode} for each IP. This lets the load
186+
* balancing policy iterate over all candidate IPs rather than only the first one, so that a
187+
* non-responsive IP does not block initial connection or control-connection reconnection.
188+
*
189+
* <p>Already-resolved addresses and non-{@link InetSocketAddress} endpoints are returned as-is.
190+
*/
191+
public List<Node> getResolvedContactPoints() {
192+
Set<DefaultNode> nodes = contactPoints;
193+
if (nodes == null) {
194+
return new ArrayList<>();
195+
}
196+
List<Node> result = new ArrayList<>();
197+
for (DefaultNode node : nodes) {
198+
EndPoint endPoint = node.getEndPoint();
199+
if (endPoint instanceof DefaultEndPoint) {
200+
InetSocketAddress address = ((DefaultEndPoint) endPoint).resolve();
201+
if (address.isUnresolved()) {
202+
// Expand hostname to all IPs so callers can try each one in turn.
203+
try {
204+
InetAddress[] all = InetAddress.getAllByName(address.getHostString());
205+
if (all.length > 1) {
206+
LOG.debug(
207+
"[{}] Contact point {} expands to {} addresses",
208+
logPrefix,
209+
address.getHostString(),
210+
all.length);
211+
}
212+
for (InetAddress ip : all) {
213+
InetSocketAddress resolved = new InetSocketAddress(ip, address.getPort());
214+
result.add(DefaultNode.newContactPoint(new DefaultEndPoint(resolved), context));
215+
}
216+
} catch (UnknownHostException e) {
217+
LOG.warn(
218+
"[{}] Could not resolve contact point hostname {}, skipping",
219+
logPrefix,
220+
address.getHostString(),
221+
e);
222+
}
223+
continue;
224+
}
225+
}
226+
// Already resolved or non-InetSocketAddress endpoint — use as-is.
227+
result.add(node);
228+
}
229+
return result;
230+
}
231+
176232
/** Whether the default contact point was used (because none were provided explicitly). */
177233
public boolean wasImplicitContactPoint() {
178234
return wasImplicitContactPoint;

core/src/test/java/com/datastax/oss/driver/internal/core/metadata/LoadBalancingPolicyWrapperTest.java

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -100,7 +100,8 @@ public void setup() {
100100
Objects.requireNonNull(node3.getHostId()), node3);
101101
when(metadataManager.getMetadata()).thenReturn(metadata);
102102
when(metadata.getNodes()).thenReturn(allNodes);
103-
when(metadataManager.getContactPoints()).thenReturn(contactPoints);
103+
when(metadataManager.getResolvedContactPoints())
104+
.thenReturn(ImmutableList.copyOf(contactPoints));
104105
when(context.getMetadataManager()).thenReturn(metadataManager);
105106

106107
when(context.getConfig()).thenReturn(config);
@@ -204,8 +205,7 @@ public void should_fetch_control_connection_query_plan_from_policy_after_init()
204205
assertThat(queryPlan.poll()).isEqualTo(node3);
205206
assertThat(queryPlan.poll()).isEqualTo(node2);
206207
assertThat(queryPlan.poll()).isEqualTo(node1);
207-
// Remaining nodes are contact points appended at the end.
208-
// They are new DefaultNode instances created via newContactPoint, so compare by endpoint.
208+
// Remaining nodes are the resolved contact points appended at the end.
209209
Set<EndPoint> remainingEndpoints = new java.util.HashSet<>();
210210
for (Node n : queryPlan) {
211211
remainingEndpoints.add(n.getEndPoint());

core/src/test/java/com/datastax/oss/driver/internal/core/metadata/MetadataManagerTest.java

Lines changed: 60 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -490,6 +490,66 @@ public void should_throw_on_registerNode_with_null_hostId() {
490490
.hasMessageContaining("Cannot register node without hostId");
491491
}
492492

493+
@Test
494+
public void should_return_empty_list_when_contact_points_not_yet_set() {
495+
// contactPoints field is null until addContactPoints is called
496+
assertThat(metadataManager.getResolvedContactPoints()).isEmpty();
497+
}
498+
499+
@Test
500+
public void should_return_already_resolved_contact_points_unchanged() {
501+
// Given — a contact point with an already-resolved InetSocketAddress
502+
metadataManager.addContactPoints(ImmutableSet.of(END_POINT2));
503+
504+
// When
505+
List<Node> resolved = metadataManager.getResolvedContactPoints();
506+
507+
// Then — the single node is returned as-is (no expansion needed)
508+
assertThat(resolved).hasSize(1);
509+
assertThat(resolved.get(0).getEndPoint()).isEqualTo(END_POINT2);
510+
}
511+
512+
@Test
513+
public void should_expand_unresolved_hostname_to_all_ips() {
514+
// Given — a contact point with an unresolved hostname (localhost → 127.0.0.1)
515+
EndPoint unresolvedEndPoint =
516+
new DefaultEndPoint(InetSocketAddress.createUnresolved("localhost", 9042));
517+
metadataManager.addContactPoints(ImmutableSet.of(unresolvedEndPoint));
518+
519+
// When
520+
List<Node> resolved = metadataManager.getResolvedContactPoints();
521+
522+
// Then — at least one node is returned, each with a resolved address
523+
assertThat(resolved).isNotEmpty();
524+
for (Node node : resolved) {
525+
InetSocketAddress addr = (InetSocketAddress) node.getEndPoint().resolve();
526+
assertThat(addr.isUnresolved()).isFalse();
527+
assertThat(addr.getPort()).isEqualTo(9042);
528+
}
529+
}
530+
531+
@Test
532+
public void should_expand_multiple_contact_points_independently() {
533+
// Given — two contact points: one already resolved, one unresolved
534+
EndPoint resolvedEndPoint = END_POINT3;
535+
EndPoint unresolvedEndPoint =
536+
new DefaultEndPoint(InetSocketAddress.createUnresolved("localhost", 9042));
537+
metadataManager.addContactPoints(ImmutableSet.of(resolvedEndPoint, unresolvedEndPoint));
538+
539+
// When
540+
List<Node> resolved = metadataManager.getResolvedContactPoints();
541+
542+
// Then — at least 2 nodes: 1 for the resolved + at least 1 for localhost expansion
543+
assertThat(resolved.size()).isGreaterThanOrEqualTo(2);
544+
// The resolved endpoint must appear
545+
assertThat(resolved).anySatisfy(n -> assertThat(n.getEndPoint()).isEqualTo(resolvedEndPoint));
546+
// All returned addresses must be resolved
547+
for (Node node : resolved) {
548+
InetSocketAddress addr = (InetSocketAddress) node.getEndPoint().resolve();
549+
assertThat(addr.isUnresolved()).isFalse();
550+
}
551+
}
552+
493553
private static class TestMetadataManager extends MetadataManager {
494554

495555
private List<MetadataRefresh> refreshes = new CopyOnWriteArrayList<>();

integration-tests/src/test/java/com/datastax/oss/driver/core/resolver/MockResolverIT.java

Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -201,6 +201,46 @@ public void run_replace_test_20_times() {
201201
}
202202
}
203203

204+
/**
205+
* Verifies that the driver can connect to a cluster when the first DNS entry for the contact
206+
* point hostname resolves to a non-responsive IP address (DRIVER-201).
207+
*
208+
* <p>Contact points are always kept as unresolved hostnames and expanded to all their DNS IPs at
209+
* connection time. When the first IP is unreachable the driver tries the remaining ones, so the
210+
* session opens successfully even if the first DNS entry is non-responsive.
211+
*/
212+
@Test
213+
public void should_connect_when_first_dns_entry_is_non_responsive() {
214+
// Use a 2-node cluster on 127.0.1.x. Node 11 (127.0.1.11) does not exist and is therefore
215+
// non-responsive; nodes 1 and 2 are real.
216+
try (CcmBridge ccmBridge = CcmBridge.builder().withNodes(2).withIpPrefix("127.0.1.").build()) {
217+
MultimapHostResolverProvider.removeResolverEntries("test.cluster.fake");
218+
// First entry intentionally points to a non-existent/non-responsive address.
219+
MultimapHostResolverProvider.addResolverEntry("test.cluster.fake", "127.0.1.11");
220+
MultimapHostResolverProvider.addResolverEntry(
221+
"test.cluster.fake", ccmBridge.getNodeIpAddress(1));
222+
MultimapHostResolverProvider.addResolverEntry(
223+
"test.cluster.fake", ccmBridge.getNodeIpAddress(2));
224+
ccmBridge.create();
225+
ccmBridge.start();
226+
227+
DriverConfigLoader loader =
228+
new DefaultProgrammaticDriverConfigLoaderBuilder()
229+
.withBoolean(TypedDriverOption.RECONNECT_ON_INIT.getRawOption(), false)
230+
.withStringList(
231+
TypedDriverOption.CONTACT_POINTS.getRawOption(),
232+
Collections.singletonList("test.cluster.fake:9042"))
233+
.build();
234+
235+
// The session must open successfully despite the first DNS entry being unreachable.
236+
try (CqlSession session = new CqlSessionBuilder().withConfigLoader(loader).build()) {
237+
ResultSet rs = session.execute("select * from system.local where key='local'");
238+
assertThat(rs.one()).isNotNull();
239+
waitForAllNodesUp(session, 2);
240+
}
241+
}
242+
}
243+
204244
// This is too long to run during CI, but is useful for manual investigations.
205245
@SuppressWarnings("unused")
206246
public void cannot_reconnect_with_resolved_socket() {

0 commit comments

Comments
 (0)