Skip to content

Commit 0fa65cb

Browse files
HIVE-29656: q.out regex fix
1 parent 4c4a618 commit 0fa65cb

3 files changed

Lines changed: 197 additions & 2 deletions

File tree

itests/util/src/main/java/org/apache/hadoop/hive/ql/QOutProcessor.java

Lines changed: 24 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -58,6 +58,7 @@ public class QOutProcessor {
5858

5959
public static final String MASK_PATTERN = "#### A masked pattern was here ####";
6060
public static final String PARTIAL_MASK_PATTERN = "#### A PARTIAL masked pattern was here ####";
61+
public static final String MASKED_VERTEX_KILLED_PATTERN = "[Masked Vertex killed due to OTHER_VERTEX_FAILURE]";
6162
private static final PatternReplacementPair MASK_STATS = new PatternReplacementPair(
6263
Pattern.compile(" Num rows: [1-9][0-9]* Data size: [1-9][0-9]*"),
6364
" Num rows: ###Masked### Data size: ###Masked###");
@@ -197,6 +198,7 @@ public void maskPatterns(String fname) throws Exception {
197198
out = new BufferedWriter(new OutputStreamWriter(new FileOutputStream(file), "UTF-8"));
198199

199200
boolean lastWasMasked = false;
201+
boolean lastWasVertexKilled = false;
200202

201203
while (null != (line = in.readLine())) {
202204
LineProcessingResult result = processLine(line);
@@ -209,10 +211,22 @@ public void maskPatterns(String fname) throws Exception {
209211
lastWasMasked = true;
210212
result.partialMaskWasMatched = false;
211213
}
214+
lastWasVertexKilled = false;
215+
} else if (result.line.equals(MASKED_VERTEX_KILLED_PATTERN)) {
216+
// Deduplicate consecutive standalone vertex-killed lines — the number of sibling
217+
// vertices still alive when the kill propagates is non-deterministic.
218+
if (!lastWasVertexKilled) {
219+
out.write(result.line);
220+
out.write("\n");
221+
lastWasVertexKilled = true;
222+
}
223+
lastWasMasked = false;
224+
result.partialMaskWasMatched = false;
212225
} else {
213226
out.write(result.line);
214227
out.write("\n");
215228
lastWasMasked = false;
229+
lastWasVertexKilled = false;
216230
result.partialMaskWasMatched = false;
217231
}
218232
}
@@ -350,7 +364,16 @@ private final static class PatternReplacementPair {
350364
// We do not want the test to fail because of this.
351365
ppm.add(new PatternReplacementPair(
352366
Pattern.compile("Vertex killed, vertexName=(.*?),.*\\[\\1\\] killed\\/failed due to:OTHER_VERTEX_FAILURE\\]"),
353-
"[Masked Vertex killed due to OTHER_VERTEX_FAILURE]"));
367+
MASKED_VERTEX_KILLED_PATTERN));
368+
369+
// Collapse multiple consecutive embedded [Masked Vertex killed] tokens on the same line
370+
// (the long FAILED: summary line repeats one token per killed vertex).
371+
ppm.add(new PatternReplacementPair(Pattern.compile("(\\Q" + MASKED_VERTEX_KILLED_PATTERN + "\\E){2,}"),
372+
MASKED_VERTEX_KILLED_PATTERN));
373+
374+
// The number of vertices killed when a DAG fails is a scheduling race condition —
375+
// depends on how many sibling vertices are still running at the moment the kill propagates.
376+
ppm.add(new PatternReplacementPair(Pattern.compile("killedVertices:[0-9]+"), "killedVertices:#Masked#"));
354377

355378
partialPlanMask = ppm.toArray(new PatternReplacementPair[ppm.size()]);
356379
}

itests/util/src/main/java/org/apache/hadoop/hive/ql/QTestUtil.java

Lines changed: 13 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1020,7 +1020,19 @@ public QTestProcessExecResult checkCliDriverResults() throws Exception {
10201020
qTestResultProcessor.overwriteResults(f.getPath(), outFileName);
10211021
return QTestProcessExecResult.createWithoutOutput(0);
10221022
} else {
1023-
return qTestResultProcessor.executeDiffCommand(f.getPath(), outFileName, false);
1023+
// Apply the same masking pipeline to a temporary copy of the reference file so that
1024+
// non-deterministic values are normalized on both sides.
1025+
// This preserves backward compatibility with existing .q.out files that were written
1026+
// before the masking rules were introduced.
1027+
File maskedRef = new File(outFileName + ".masked_ref");
1028+
try {
1029+
FileUtils.copyFile(new File(outFileName), maskedRef);
1030+
qOutProcessor.maskPatterns(maskedRef.getPath());
1031+
return qTestResultProcessor.executeDiffCommand(f.getPath(), maskedRef.getPath(), false);
1032+
} finally {
1033+
maskedRef.delete();
1034+
new File(maskedRef.getPath() + ".orig").delete();
1035+
}
10241036
}
10251037
}
10261038

itests/util/src/test/java/org/apache/hadoop/hive/ql/TestQOutProcessor.java

Lines changed: 160 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,17 +17,158 @@
1717
*/
1818
package org.apache.hadoop.hive.ql;
1919

20+
import java.io.File;
21+
import java.io.PrintWriter;
22+
import java.nio.charset.StandardCharsets;
23+
import java.nio.file.Files;
24+
import java.util.Arrays;
25+
import java.util.List;
26+
2027
import org.apache.hadoop.hive.ql.QTestMiniClusters.FsType;
2128
import org.apache.hadoop.hive.ql.qoption.QTestReplaceHandler;
2229
import org.junit.Assert;
30+
import org.junit.Rule;
2331
import org.junit.Test;
32+
import org.junit.rules.TemporaryFolder;
2433

2534
/**
2635
* This class contains unit tests for QTestUtil
2736
*/
2837
public class TestQOutProcessor {
2938
QOutProcessor qOutProcessor = new QOutProcessor(FsType.LOCAL, new QTestReplaceHandler());
3039

40+
@Rule
41+
public TemporaryFolder tmpFolder = new TemporaryFolder();
42+
43+
/**
44+
* A raw vertex-killed log line must be replaced with MASKED_VERTEX_KILLED_PATTERN.
45+
*/
46+
@Test
47+
public void testVertexKilledLineIsReplaced() {
48+
String raw = "Vertex killed, vertexName=Map 2, "
49+
+ "diagnostics=[Task failed, taskAttemptId=attempt_1 "
50+
+ "[Map 2] killed/failed due to:OTHER_VERTEX_FAILURE]";
51+
Assert.assertEquals(QOutProcessor.MASKED_VERTEX_KILLED_PATTERN, processLine(raw));
52+
}
53+
54+
/**
55+
* A line containing multiple embedded MASKED_VERTEX_KILLED_PATTERN tokens
56+
* (produced after the first regex pass) must be collapsed to a single token.
57+
*/
58+
@Test
59+
public void testMultipleEmbeddedVertexKilledTokensCollapsedOnSameLine() {
60+
String twoTokens = QOutProcessor.MASKED_VERTEX_KILLED_PATTERN
61+
+ QOutProcessor.MASKED_VERTEX_KILLED_PATTERN;
62+
Assert.assertEquals(QOutProcessor.MASKED_VERTEX_KILLED_PATTERN, processLine(twoTokens));
63+
64+
String threeTokens = QOutProcessor.MASKED_VERTEX_KILLED_PATTERN
65+
+ QOutProcessor.MASKED_VERTEX_KILLED_PATTERN
66+
+ QOutProcessor.MASKED_VERTEX_KILLED_PATTERN;
67+
Assert.assertEquals(QOutProcessor.MASKED_VERTEX_KILLED_PATTERN, processLine(threeTokens));
68+
}
69+
70+
/**
71+
* A single MASKED_VERTEX_KILLED_PATTERN token must be left unchanged by processLine.
72+
*/
73+
@Test
74+
public void testSingleEmbeddedVertexKilledTokenUnchanged() {
75+
Assert.assertEquals(
76+
QOutProcessor.MASKED_VERTEX_KILLED_PATTERN,
77+
processLine(QOutProcessor.MASKED_VERTEX_KILLED_PATTERN));
78+
}
79+
80+
/**
81+
* killedVertices:<number> must be masked regardless of the numeric value.
82+
*/
83+
@Test
84+
public void testKilledVerticesCountIsMasked() {
85+
Assert.assertEquals("killedVertices:#Masked#", processLine("killedVertices:3"));
86+
Assert.assertEquals("killedVertices:#Masked#", processLine("killedVertices:0"));
87+
Assert.assertEquals("killedVertices:#Masked#", processLine("killedVertices:100"));
88+
}
89+
90+
/**
91+
* killedVertices masking should work when embedded in a longer line (e.g. FAILED: summary).
92+
*/
93+
@Test
94+
public void testKilledVerticesCountIsMaskedInLongerLine() {
95+
String input = "FAILED: Execution Error, return code 2 from org.apache.hadoop.hive.ql.exec.tez."
96+
+ "TezTask. killedVertices:2 Vertex re-run not supported in current execution mode.";
97+
String output = processLine(input);
98+
Assert.assertTrue("killedVertices:#Masked# must appear in output",
99+
output.contains("killedVertices:#Masked#"));
100+
Assert.assertFalse("raw killedVertices:2 must not appear in output",
101+
output.contains("killedVertices:2"));
102+
}
103+
104+
/**
105+
* Multiple consecutive standalone MASKED_VERTEX_KILLED_PATTERN lines must be
106+
* collapsed to a single line by maskPatterns().
107+
*/
108+
@Test
109+
public void testConsecutiveVertexKilledLinesDeduplicatedInFile() throws Exception {
110+
File f = tmpFile(
111+
"line before",
112+
QOutProcessor.MASKED_VERTEX_KILLED_PATTERN,
113+
QOutProcessor.MASKED_VERTEX_KILLED_PATTERN,
114+
QOutProcessor.MASKED_VERTEX_KILLED_PATTERN,
115+
"line after");
116+
117+
qOutProcessor.maskPatterns(f.getAbsolutePath());
118+
119+
List<String> lines = readLines(f);
120+
Assert.assertEquals(
121+
Arrays.asList("line before", QOutProcessor.MASKED_VERTEX_KILLED_PATTERN, "line after"),
122+
lines);
123+
}
124+
125+
/**
126+
* Two separate (non-consecutive) vertex-killed blocks must each produce one line.
127+
*/
128+
@Test
129+
public void testNonConsecutiveVertexKilledLinesKeptSeparately() throws Exception {
130+
File f = tmpFile(
131+
QOutProcessor.MASKED_VERTEX_KILLED_PATTERN,
132+
QOutProcessor.MASKED_VERTEX_KILLED_PATTERN,
133+
"some other line",
134+
QOutProcessor.MASKED_VERTEX_KILLED_PATTERN,
135+
QOutProcessor.MASKED_VERTEX_KILLED_PATTERN);
136+
137+
qOutProcessor.maskPatterns(f.getAbsolutePath());
138+
139+
List<String> lines = readLines(f);
140+
Assert.assertEquals(
141+
Arrays.asList(
142+
QOutProcessor.MASKED_VERTEX_KILLED_PATTERN,
143+
"some other line",
144+
QOutProcessor.MASKED_VERTEX_KILLED_PATTERN),
145+
lines);
146+
}
147+
148+
/**
149+
* Vertex-killed deduplication must reset when a normal (masked) line interrupts
150+
* the run of vertex-killed lines.
151+
*/
152+
@Test
153+
public void testVertexKilledRunResetByMaskedLine() throws Exception {
154+
// "Deleted something" starts with "Deleted" → gets replaced by MASK_PATTERN
155+
File f = tmpFile(
156+
QOutProcessor.MASKED_VERTEX_KILLED_PATTERN,
157+
"Deleted /tmp/something", // will be masked → MASK_PATTERN
158+
QOutProcessor.MASKED_VERTEX_KILLED_PATTERN);
159+
160+
qOutProcessor.maskPatterns(f.getAbsolutePath());
161+
162+
List<String> lines = readLines(f);
163+
// MASK_PATTERN lines fold duplicates; but here there is only one occurrence
164+
Assert.assertEquals(
165+
Arrays.asList(
166+
QOutProcessor.MASKED_VERTEX_KILLED_PATTERN,
167+
QOutProcessor.MASK_PATTERN,
168+
QOutProcessor.MASKED_VERTEX_KILLED_PATTERN),
169+
lines);
170+
}
171+
31172
@Test
32173
public void testSelectiveHdfsPatternMaskOnlyHdfsPath() {
33174
Assert.assertEquals("nothing to be masked", processLine("nothing to be masked"));
@@ -77,4 +218,23 @@ public void testSelectiveHdfsPatternMaskOnlyHdfsPath() {
77218
private String processLine(String line) {
78219
return qOutProcessor.processLine(line).get();
79220
}
221+
222+
private File tmpFile(String... lines) throws Exception {
223+
File f = tmpFolder.newFile();
224+
try (PrintWriter pw = new PrintWriter(f, "UTF-8")) {
225+
for (String l : lines) {
226+
pw.println(l);
227+
}
228+
}
229+
return f;
230+
}
231+
232+
private List<String> readLines(File f) throws Exception {
233+
List<String> all = Files.readAllLines(f.toPath(), StandardCharsets.UTF_8);
234+
while (!all.isEmpty() && all.get(all.size() - 1).isEmpty()) {
235+
all.remove(all.size() - 1);
236+
}
237+
return all;
238+
}
239+
80240
}

0 commit comments

Comments
 (0)