Skip to content

Commit c951532

Browse files
committed
pr comments
1 parent cde65eb commit c951532

3 files changed

Lines changed: 39 additions & 23 deletions

File tree

README.md

Lines changed: 19 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -183,6 +183,12 @@ You may want queries, asserts or query metadata to use a specific date column, o
183183

184184
Simply pass `--parameter KEY=VALUE` in the CLI and the `KEY` will be replaced with `VALUE` in all queries, query metadata and test assertions. For example, to query June 23rd 2015, you could use `--parameter DATE=2015-06-23`. If the query uses `${DATE}` in the query it will be replaced before execution with `2015-06-23`.
185185

186+
### Query Parallelism
187+
188+
You may want to run queries in parallel rather than sequentially especially if you have many time-consuming queries.
189+
190+
To enable this feature, pass in `--query-parallel-enable true` when launching Validatar. By default, this will run all queries in parallel. If this number needs to be limited, pass in `--query-parallel-max VALUE` where `VALUE` is the max number of queries that should run concurrently.
191+
186192
## Execution Engines
187193

188194
### Hive
@@ -330,6 +336,17 @@ Option Description
330336
fully qualified classes to plug in>
331337
332338
339+
Engine Options:
340+
Option Description
341+
------ -----------
342+
--query-parallel-enable <Boolean: Whether or not queries should run in
343+
Query parallelism option> parallel. (default: false)
344+
--query-parallel-max <Integer: Max The max number of queries that will
345+
query parallelism> run concurrently. If non-positive or
346+
unspecified, all queries will run at
347+
once. (default: 0)
348+
349+
333350
Hive engine options:
334351
Option (* = required) Description
335352
--------------------- -----------
@@ -488,10 +505,11 @@ Version | Notes
488505
0.5.4 | Added a flag ```--report-on-failure-only``` to only generate reports if there were failures in tests (including warnOnly) or queries
489506
0.5.5 | Shaded ```org.objectweb.asm``` to not clash with asm in Hadoop environments
490507
0.5.6 | Fixed a bug with pretty-printing results with nulls
491-
0.6.0 | Better reporting (show data with only the assertion columns with the assertion result column, columns now in sorted order, EMail Subject Prefix). Can now write multiple reports per invocation (specify more than one report formatter using --report-format)
508+
0.6.0 | Better reporting (show data with only the assertion columns with the assertion result column, columns now in sorted order, Email Subject Prefix). Can now write multiple reports per invocation (specify more than one report formatter using --report-format)
492509
0.6.1 | Added a flag to configure the Email reporter SMTP strategy. Use ```--email-smtp-strategy``` to pass in ```SMTP_PLAIN```, ```SMTP_TLS``` or ```SMTP_SSL```.
493510
0.6.2 | Bintray EOL. First rerelease of 0.6.1 on Maven Central instead
494511
0.6.3 | Screwdriver migration. First rerelease of 0.6.1 using Screwdriver instead of Travis.
512+
0.6.4 | Added support for running queries in parallel
495513

496514
## Members
497515

src/main/java/com/yahoo/validatar/execution/EngineManager.java

Lines changed: 16 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -30,31 +30,30 @@
3030
public class EngineManager extends Pluggable<Engine> implements Helpable {
3131
public static final String CUSTOM_ENGINE = "custom-engine";
3232
public static final String CUSTOM_ENGINE_DESCRIPTION = "Additional custom engine to load.";
33+
public static final String QUERY_PARALLEL_ENABLE = "query-parallel-enable";
34+
public static final String QUERY_PARALLEL_MAX = "query-parallel-max";
35+
private static final int QUERY_PARALLEL_MIN = 1;
3336

34-
35-
public static final String PARALLEL = "parallel";
36-
public static final String THREAD_POOL_SIZE = "thread-pool-size";
37-
38-
protected boolean parallel;
39-
protected int threadPoolSize;
37+
protected boolean queryParallelEnable;
38+
protected int queryParallelMax;
4039

4140
private static final OptionParser PARSER = new OptionParser() {
4241
{
43-
accepts(PARALLEL, "Whether or not queries should run in parallel.")
42+
accepts(QUERY_PARALLEL_ENABLE, "Whether or not queries should run in parallel.")
4443
.withRequiredArg()
45-
.describedAs("Parallel option")
44+
.describedAs("Query parallelism option")
4645
.ofType(Boolean.class)
4746
.defaultsTo(false);
48-
accepts(THREAD_POOL_SIZE, "The number of queries that can run in parallel.")
47+
accepts(QUERY_PARALLEL_MAX, "The max number of queries that will run concurrently. If non-positive or " +
48+
"unspecified, all queries will run at once.")
4949
.withRequiredArg()
50-
.describedAs("Thread pool size")
50+
.describedAs("Max query parallelism")
5151
.ofType(Integer.class)
5252
.defaultsTo(0);
5353
allowsUnrecognizedOptions();
5454
}
5555
};
5656

57-
5857
/**
5958
* The Engine classes to manage.
6059
*/
@@ -114,8 +113,8 @@ public EngineManager(String[] arguments) {
114113
}
115114

116115
OptionSet parser = PARSER.parse(arguments);
117-
parallel = (Boolean) parser.valueOf(PARALLEL);
118-
threadPoolSize = (Integer) parser.valueOf(THREAD_POOL_SIZE);
116+
queryParallelEnable = (Boolean) parser.valueOf(QUERY_PARALLEL_ENABLE);
117+
queryParallelMax = (Integer) parser.valueOf(QUERY_PARALLEL_MAX);
119118
}
120119

121120
/**
@@ -138,8 +137,7 @@ protected boolean startEngines(List<Query> queries) {
138137
List<Query> all = queries == null ? Collections.emptyList() : queries;
139138
// Queries -> engine name Set -> start engine -> verify all started
140139
return all.stream().map(q -> q.engine).collect(Collectors.toSet())
141-
.stream().map(this::startEngine)
142-
.allMatch(b -> b);
140+
.stream().allMatch(this::startEngine);
143141
}
144142

145143
private boolean startEngine(String engine) {
@@ -163,6 +161,7 @@ private boolean startEngine(String engine) {
163161

164162
@Override
165163
public void printHelp() {
164+
Helpable.printHelp("Engine Options", PARSER);
166165
engines.values().stream().map(WorkingEngine::getEngine).forEach(Engine::printHelp);
167166
Helpable.printHelp("Advanced Engine Options", getPluginOptionsParser());
168167
}
@@ -185,12 +184,11 @@ public boolean run(List<Query> queries) {
185184
if (!startEngines(queries)) {
186185
return false;
187186
}
188-
189187
// Run each query.
190-
if (!parallel) {
188+
if (!queryParallelEnable) {
191189
queries.forEach(this::run);
192190
} else {
193-
int poolSize = threadPoolSize > 0 ? threadPoolSize : queries.size();
191+
int poolSize = Math.max(queryParallelMax > 0 ? queryParallelMax : queries.size(), QUERY_PARALLEL_MIN);
194192
log.info("Creating a ForkJoinPool with size {}", poolSize);
195193
ForkJoinPool forkJoinPool = new ForkJoinPool(poolSize);
196194
try {

src/test/java/com/yahoo/validatar/execution/EngineManagerTest.java

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -261,7 +261,7 @@ public void testParallelRun() {
261261
query.engine = MockRunningEngine.ENGINE_NAME;
262262
query.name = "Foo";
263263

264-
String[] args = {"--parallel", "true"};
264+
String[] args = {"--query-parallel-enable", "true"};
265265
manager = new EngineManager(args);
266266
manager.setEngines(engines);
267267

@@ -291,9 +291,9 @@ public void testParallelRun() {
291291

292292
@Test
293293
public void testConstructor() {
294-
String[] args = {"--parallel", "true", "--thread-pool-size", "10"};
294+
String[] args = {"--query-parallel-enable", "true", "--query-parallel-max", "10"};
295295
manager = new EngineManager(args);
296-
Assert.assertTrue(manager.parallel);
297-
Assert.assertEquals(manager.threadPoolSize, 10);
296+
Assert.assertTrue(manager.queryParallelEnable);
297+
Assert.assertEquals(manager.queryParallelMax, 10);
298298
}
299299
}

0 commit comments

Comments
 (0)