1717 */
1818package org .apache .hadoop .hive .ql ;
1919
20+ import java .io .File ;
21+ import java .io .PrintWriter ;
22+ import java .nio .charset .StandardCharsets ;
23+ import java .nio .file .Files ;
24+ import java .util .Arrays ;
25+ import java .util .List ;
26+
2027import org .apache .hadoop .hive .ql .QTestMiniClusters .FsType ;
2128import org .apache .hadoop .hive .ql .qoption .QTestReplaceHandler ;
2229import org .junit .Assert ;
30+ import org .junit .Rule ;
2331import org .junit .Test ;
32+ import org .junit .rules .TemporaryFolder ;
2433
2534/**
2635 * This class contains unit tests for QTestUtil
2736 */
2837public class TestQOutProcessor {
2938 QOutProcessor qOutProcessor = new QOutProcessor (FsType .LOCAL , new QTestReplaceHandler ());
3039
40+ @ Rule
41+ public TemporaryFolder tmpFolder = new TemporaryFolder ();
42+
43+ /**
44+ * A raw vertex-killed log line must be replaced with MASKED_VERTEX_KILLED_PATTERN.
45+ */
46+ @ Test
47+ public void testVertexKilledLineIsReplaced () {
48+ String raw = "Vertex killed, vertexName=Map 2, "
49+ + "diagnostics=[Task failed, taskAttemptId=attempt_1 "
50+ + "[Map 2] killed/failed due to:OTHER_VERTEX_FAILURE]" ;
51+ Assert .assertEquals (QOutProcessor .MASKED_VERTEX_KILLED_PATTERN , processLine (raw ));
52+ }
53+
54+ /**
55+ * A line containing multiple embedded MASKED_VERTEX_KILLED_PATTERN tokens
56+ * (produced after the first regex pass) must be collapsed to a single token.
57+ */
58+ @ Test
59+ public void testMultipleEmbeddedVertexKilledTokensCollapsedOnSameLine () {
60+ String twoTokens = QOutProcessor .MASKED_VERTEX_KILLED_PATTERN
61+ + QOutProcessor .MASKED_VERTEX_KILLED_PATTERN ;
62+ Assert .assertEquals (QOutProcessor .MASKED_VERTEX_KILLED_PATTERN , processLine (twoTokens ));
63+
64+ String threeTokens = QOutProcessor .MASKED_VERTEX_KILLED_PATTERN
65+ + QOutProcessor .MASKED_VERTEX_KILLED_PATTERN
66+ + QOutProcessor .MASKED_VERTEX_KILLED_PATTERN ;
67+ Assert .assertEquals (QOutProcessor .MASKED_VERTEX_KILLED_PATTERN , processLine (threeTokens ));
68+ }
69+
70+ /**
71+ * A single MASKED_VERTEX_KILLED_PATTERN token must be left unchanged by processLine.
72+ */
73+ @ Test
74+ public void testSingleEmbeddedVertexKilledTokenUnchanged () {
75+ Assert .assertEquals (
76+ QOutProcessor .MASKED_VERTEX_KILLED_PATTERN ,
77+ processLine (QOutProcessor .MASKED_VERTEX_KILLED_PATTERN ));
78+ }
79+
80+ /**
81+ * killedVertices:<number> must be masked regardless of the numeric value.
82+ */
83+ @ Test
84+ public void testKilledVerticesCountIsMasked () {
85+ Assert .assertEquals ("killedVertices:#Masked#" , processLine ("killedVertices:3" ));
86+ Assert .assertEquals ("killedVertices:#Masked#" , processLine ("killedVertices:0" ));
87+ Assert .assertEquals ("killedVertices:#Masked#" , processLine ("killedVertices:100" ));
88+ }
89+
90+ /**
91+ * killedVertices masking should work when embedded in a longer line (e.g. FAILED: summary).
92+ */
93+ @ Test
94+ public void testKilledVerticesCountIsMaskedInLongerLine () {
95+ String input = "FAILED: Execution Error, return code 2 from org.apache.hadoop.hive.ql.exec.tez."
96+ + "TezTask. killedVertices:2 Vertex re-run not supported in current execution mode." ;
97+ String output = processLine (input );
98+ Assert .assertTrue ("killedVertices:#Masked# must appear in output" ,
99+ output .contains ("killedVertices:#Masked#" ));
100+ Assert .assertFalse ("raw killedVertices:2 must not appear in output" ,
101+ output .contains ("killedVertices:2" ));
102+ }
103+
104+ /**
105+ * Multiple consecutive standalone MASKED_VERTEX_KILLED_PATTERN lines must be
106+ * collapsed to a single line by maskPatterns().
107+ */
108+ @ Test
109+ public void testConsecutiveVertexKilledLinesDeduplicatedInFile () throws Exception {
110+ File f = tmpFile (
111+ "line before" ,
112+ QOutProcessor .MASKED_VERTEX_KILLED_PATTERN ,
113+ QOutProcessor .MASKED_VERTEX_KILLED_PATTERN ,
114+ QOutProcessor .MASKED_VERTEX_KILLED_PATTERN ,
115+ "line after" );
116+
117+ qOutProcessor .maskPatterns (f .getAbsolutePath ());
118+
119+ List <String > lines = readLines (f );
120+ Assert .assertEquals (
121+ Arrays .asList ("line before" , QOutProcessor .MASKED_VERTEX_KILLED_PATTERN , "line after" ),
122+ lines );
123+ }
124+
125+ /**
126+ * Two separate (non-consecutive) vertex-killed blocks must each produce one line.
127+ */
128+ @ Test
129+ public void testNonConsecutiveVertexKilledLinesKeptSeparately () throws Exception {
130+ File f = tmpFile (
131+ QOutProcessor .MASKED_VERTEX_KILLED_PATTERN ,
132+ QOutProcessor .MASKED_VERTEX_KILLED_PATTERN ,
133+ "some other line" ,
134+ QOutProcessor .MASKED_VERTEX_KILLED_PATTERN ,
135+ QOutProcessor .MASKED_VERTEX_KILLED_PATTERN );
136+
137+ qOutProcessor .maskPatterns (f .getAbsolutePath ());
138+
139+ List <String > lines = readLines (f );
140+ Assert .assertEquals (
141+ Arrays .asList (
142+ QOutProcessor .MASKED_VERTEX_KILLED_PATTERN ,
143+ "some other line" ,
144+ QOutProcessor .MASKED_VERTEX_KILLED_PATTERN ),
145+ lines );
146+ }
147+
148+ /**
149+ * Vertex-killed deduplication must reset when a normal (masked) line interrupts
150+ * the run of vertex-killed lines.
151+ */
152+ @ Test
153+ public void testVertexKilledRunResetByMaskedLine () throws Exception {
154+ // "Deleted something" starts with "Deleted" → gets replaced by MASK_PATTERN
155+ File f = tmpFile (
156+ QOutProcessor .MASKED_VERTEX_KILLED_PATTERN ,
157+ "Deleted /tmp/something" , // will be masked → MASK_PATTERN
158+ QOutProcessor .MASKED_VERTEX_KILLED_PATTERN );
159+
160+ qOutProcessor .maskPatterns (f .getAbsolutePath ());
161+
162+ List <String > lines = readLines (f );
163+ // MASK_PATTERN lines fold duplicates; but here there is only one occurrence
164+ Assert .assertEquals (
165+ Arrays .asList (
166+ QOutProcessor .MASKED_VERTEX_KILLED_PATTERN ,
167+ QOutProcessor .MASK_PATTERN ,
168+ QOutProcessor .MASKED_VERTEX_KILLED_PATTERN ),
169+ lines );
170+ }
171+
31172 @ Test
32173 public void testSelectiveHdfsPatternMaskOnlyHdfsPath () {
33174 Assert .assertEquals ("nothing to be masked" , processLine ("nothing to be masked" ));
@@ -77,4 +218,23 @@ public void testSelectiveHdfsPatternMaskOnlyHdfsPath() {
77218 private String processLine (String line ) {
78219 return qOutProcessor .processLine (line ).get ();
79220 }
221+
222+ private File tmpFile (String ... lines ) throws Exception {
223+ File f = tmpFolder .newFile ();
224+ try (PrintWriter pw = new PrintWriter (f , "UTF-8" )) {
225+ for (String l : lines ) {
226+ pw .println (l );
227+ }
228+ }
229+ return f ;
230+ }
231+
232+ private List <String > readLines (File f ) throws Exception {
233+ List <String > all = Files .readAllLines (f .toPath (), StandardCharsets .UTF_8 );
234+ while (!all .isEmpty () && all .get (all .size () - 1 ).isEmpty ()) {
235+ all .remove (all .size () - 1 );
236+ }
237+ return all ;
238+ }
239+
80240}
0 commit comments