Skip to content

Commit d3c7ebb

Browse files
committed
remove isRunningOnEMR
regex is specific enough to avoid collision
1 parent a8fdf19 commit d3c7ebb

4 files changed

Lines changed: 3 additions & 31 deletions

File tree

dd-java-agent/instrumentation/spark/spark-common/build.gradle

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,6 @@ dependencies {
2828
testFixturesCompileOnly(libs.bundles.spock)
2929

3030
testImplementation project(':dd-java-agent:instrumentation-testing')
31-
testImplementation group: 'org.apache.spark', name: 'spark-core_2.12', version: '2.4.0'
3231
testImplementation group: 'org.apache.spark', name: 'spark-launcher_2.12', version: '2.4.0'
3332
}
3433

dd-java-agent/instrumentation/spark/spark-common/src/main/java/datadog/trace/instrumentation/spark/AbstractDatadogSparkListener.java

Lines changed: 2 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -134,7 +134,6 @@ public abstract class AbstractDatadogSparkListener extends SparkListener {
134134

135135
private volatile boolean isStreamingJob = false;
136136
private final boolean isRunningOnDatabricks;
137-
private final boolean isRunningOnEmr;
138137
private final String databricksClusterName;
139138
private final String databricksServiceName;
140139
private final String sparkServiceName;
@@ -157,7 +156,6 @@ public AbstractDatadogSparkListener(SparkConf sparkConf, String appId, String sp
157156
this.sparkVersion = sparkVersion;
158157

159158
isRunningOnDatabricks = sparkConf.contains("spark.databricks.sparkContextId");
160-
isRunningOnEmr = EmrUtils.isRunningOnEmr(sparkConf);
161159
databricksClusterName = sparkConf.get("spark.databricks.clusterUsageTags.clusterName", null);
162160
databricksServiceName = getDatabricksServiceName(sparkConf, databricksClusterName);
163161
sparkServiceName = getSparkServiceName(sparkConf, isRunningOnDatabricks);
@@ -278,7 +276,7 @@ private void initApplicationSpanIfNotInitialized() {
278276
}
279277

280278
captureApplicationParameters(builder);
281-
captureEmrStepId(builder, isRunningOnEmr);
279+
captureEmrStepId(builder);
282280

283281
Optional<OpenlineageParentContext> openlineageParentContext =
284282
OpenlineageParentContext.from(sparkConf);
@@ -1213,10 +1211,7 @@ private void captureApplicationParameters(AgentTracer.SpanBuilder builder) {
12131211
builder.withTag("config.spark_version", sparkVersion);
12141212
}
12151213

1216-
private static void captureEmrStepId(AgentTracer.SpanBuilder builder, boolean isRunningOnEmr) {
1217-
if (!isRunningOnEmr) {
1218-
return;
1219-
}
1214+
private static void captureEmrStepId(AgentTracer.SpanBuilder builder) {
12201215
String stepId = EmrUtils.getEmrStepId();
12211216
if (stepId != null) {
12221217
builder.withTag("emr_step_id", stepId);

dd-java-agent/instrumentation/spark/spark-common/src/main/java/datadog/trace/instrumentation/spark/EmrUtils.java

Lines changed: 1 addition & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -2,32 +2,20 @@
22

33
import java.nio.file.Path;
44
import java.nio.file.Paths;
5-
import java.util.Arrays;
65
import java.util.regex.Matcher;
76
import java.util.regex.Pattern;
87
import javax.annotation.Nullable;
9-
import org.apache.spark.SparkConf;
108
import org.slf4j.Logger;
119
import org.slf4j.LoggerFactory;
1210

13-
/** Utilities for detecting AWS EMR and extracting EMR-specific metadata. */
11+
/** Extracts the AWS EMR Step ID from the working directory name (e.g. s-07767992IY7VC5NVV854). */
1412
class EmrUtils {
1513

1614
private static final Logger log = LoggerFactory.getLogger(EmrUtils.class);
1715

1816
/** EMR step ID is a 20 character string with numbers and uppercase letters only */
1917
private static final Pattern EMR_STEP_ID_PATTERN = Pattern.compile("^(s-[0-9A-Z]{20})$");
2018

21-
/**
22-
* Returns true if the Spark job is running on AWS EMR. Detection scans all SparkConf keys for the
23-
* {@code .emr.} substring, which is present in EMR-specific keys (e.g. {@code
24-
* spark.emr.default.executor.cores}, {@code spark.sql.emr.internal.extensions}) and absent from
25-
* all standard Spark configuration keys.
26-
*/
27-
static boolean isRunningOnEmr(SparkConf conf) {
28-
return Arrays.stream(conf.getAll()).anyMatch(t -> t._1().contains(".emr."));
29-
}
30-
3119
@Nullable
3220
static String getEmrStepId() {
3321
try {

dd-java-agent/instrumentation/spark/spark-common/src/test/java/datadog/trace/instrumentation/spark/EmrUtilsTest.java

Lines changed: 0 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -2,9 +2,7 @@
22

33
import static org.junit.jupiter.api.Assertions.assertEquals;
44
import static org.junit.jupiter.api.Assertions.assertNull;
5-
import static org.junit.jupiter.api.Assertions.assertTrue;
65

7-
import org.apache.spark.SparkConf;
86
import org.junit.jupiter.api.AfterEach;
97
import org.junit.jupiter.api.BeforeEach;
108
import org.junit.jupiter.api.Test;
@@ -40,12 +38,4 @@ void returnsNullForApplicationIdWorkdir() {
4038
System.setProperty("user.dir", "/home/hadoop/application_1234567890_0001");
4139
assertNull(EmrUtils.getEmrStepId());
4240
}
43-
44-
@Test
45-
void isRunningOnEmrReturnsTrueWhenConfContainsEmrKey() {
46-
SparkConf conf = new SparkConf();
47-
conf.set(
48-
"spark.sql.emr.internal.extensions", "com.amazonaws.emr.spark.EmrSparkSessionExtensions");
49-
assertTrue(EmrUtils.isRunningOnEmr(conf));
50-
}
5141
}

0 commit comments

Comments
 (0)