diff --git a/src/doc/examples/avoiding-executor-starvation.md b/src/doc/examples/avoiding-executor-starvation.md new file mode 100644 index 000000000..65a5fdcd6 --- /dev/null +++ b/src/doc/examples/avoiding-executor-starvation.md @@ -0,0 +1,141 @@ +# Avoiding Executor Starvation + +When a pipeline uses `agent any` (or a specific label) at the top level together +with `options { lock(...) }`, every build allocates an executor **before** +trying to acquire the lock. If the resource is busy the build sits on the +executor doing nothing — preventing other jobs from using it. This is called +**executor starvation**. + +## The problem + +```groovy +pipeline { + agent { + label 'some-label' + } + options { + lock('end-to-end-test-resource') + } + stages { + stage('Test') { + steps { + echo 'Running end-to-end tests...' + } + } + } +} +``` + +With this layout, if three builds of the same job are queued and there is only +one `end-to-end-test-resource`, all three grab an executor yet only one can +proceed. The other two hold their executors hostage while they wait for the +lock, blocking any other job that needs to run on those executors. + +## The solution — `agent none` + stage-level agent + +Move the `agent` directive **inside the stage** that needs the lock: + +```groovy +pipeline { + agent none // no executor allocated up front + stages { + stage('Test') { + options { + lock('end-to-end-test-resource') // acquire lock first … + } + agent { + label 'some-label' // … then allocate an executor + } + steps { + echo 'Running end-to-end tests...' + } + } + } +} +``` + +Now a build waiting for the lock does **not** occupy an executor. Once the lock +is acquired the stage allocates the agent and runs. Other jobs can use the +executors in the meantime. + +## Preparation stages that don't need the lock + +If your build has work that can run without the resource, split it into a +separate stage so the lock is held only when necessary: + +```groovy +pipeline { + agent none + stages { + stage('Build') { + agent any + steps { + echo 'Compiling — no lock needed' + } + } + stage('Deploy') { + options { + lock resource: 'deploy-target' + } + agent any + steps { + echo 'Deploying — lock held' + } + } + } +} +``` + +## Multiple stages under one lock + +If several stages need the same resource, wrap them in a parent stage: + +```groovy +pipeline { + agent none + stages { + stage('Deploy and Test') { + options { + lock resource: 'test-environment' + } + agent any + stages { + stage('Deploy') { + steps { + echo 'Deploying...' + } + } + stage('Integration Test') { + steps { + echo 'Testing...' + } + } + } + } + } +} +``` + +The lock is acquired once for the parent stage and released after the last +nested stage completes. No executor is consumed while waiting. + +## Scripted pipeline equivalent + +In scripted pipelines the same principle applies — acquire the lock **before** +allocating a node: + +```groovy +lock('end-to-end-test-resource') { + node('some-label') { + echo 'Running with lock and node' + } +} +``` + +See also [Node dependent resources](lock-nodes.md) for more scripted examples. + +## See also + +- [Lock specific stages](lock-specific-stages.md) +- [Locking multiple stages in declarative pipeline](locking-multiple-stages-in-declarative-pipeline.md) +- [Node dependent resources](lock-nodes.md) diff --git a/src/doc/examples/readme.md b/src/doc/examples/readme.md index 7a9136f34..03d6f60b1 100644 --- a/src/doc/examples/readme.md +++ b/src/doc/examples/readme.md @@ -5,6 +5,7 @@ Examples of lockable resources include: If you have an example to share, please create a [new documentation issue](https://github.com/jenkinsci/lockable-resources-plugin/issues/new?assignees=&labels=documentation&template=3-documentation.yml) and provide additional examples as a [pull request](https://github.com/jenkinsci/lockable-resources-plugin/pulls) to the repository. If you have a question, please open a [GitHub issue](https://github.com/jenkinsci/lockable-resources-plugin/issues/new/choose) with your question. +- [Avoiding executor starvation](avoiding-executor-starvation.md) - [Node depended resources](lock-nodes.md) - [Lock specific stages](lock-specific-stages.md) - [Locking multiple stages in declarative pipeline](locking-multiple-stages-in-declarative-pipeline.md) diff --git a/src/main/java/org/jenkins/plugins/lockableresources/nodes/NodesMirror.java b/src/main/java/org/jenkins/plugins/lockableresources/nodes/NodesMirror.java index d6d51906b..a768fc057 100644 --- a/src/main/java/org/jenkins/plugins/lockableresources/nodes/NodesMirror.java +++ b/src/main/java/org/jenkins/plugins/lockableresources/nodes/NodesMirror.java @@ -1,4 +1,4 @@ -package org.jenkins.plugins.lockableresources; +package org.jenkins.plugins.lockableresources.nodes; import hudson.Extension; import hudson.init.InitMilestone; @@ -10,6 +10,8 @@ import java.util.stream.Collectors; import jenkins.model.Jenkins; import jenkins.util.SystemProperties; +import org.jenkins.plugins.lockableresources.LockableResource; +import org.jenkins.plugins.lockableresources.LockableResourcesManager; import org.jenkins.plugins.lockableresources.util.Constants; // ----------------------------------------------------------------------------- diff --git a/src/test/java/org/jenkins/plugins/lockableresources/ConcurrentModificationExceptionTest.java b/src/test/java/org/jenkins/plugins/lockableresources/ConcurrentModificationExceptionTest.java index 4e135231b..741d57220 100644 --- a/src/test/java/org/jenkins/plugins/lockableresources/ConcurrentModificationExceptionTest.java +++ b/src/test/java/org/jenkins/plugins/lockableresources/ConcurrentModificationExceptionTest.java @@ -91,7 +91,7 @@ public void run() { public void run() { System.setProperty(Constants.SYSTEM_PROPERTY_ENABLE_NODE_MIRROR, "true"); LOGGER.info("run NodesMirror"); - org.jenkins.plugins.lockableresources.NodesMirror.createNodeResources(); + org.jenkins.plugins.lockableresources.nodes.NodesMirror.createNodeResources(); LOGGER.info("NodesMirror done"); } }; diff --git a/src/test/java/org/jenkins/plugins/lockableresources/DeclarativePipelineExecutorStarvationTest.java b/src/test/java/org/jenkins/plugins/lockableresources/DeclarativePipelineExecutorStarvationTest.java new file mode 100644 index 000000000..b15ca7cae --- /dev/null +++ b/src/test/java/org/jenkins/plugins/lockableresources/DeclarativePipelineExecutorStarvationTest.java @@ -0,0 +1,318 @@ +package org.jenkins.plugins.lockableresources; + +import static org.junit.jupiter.api.Assertions.assertNotNull; +import static org.junit.jupiter.api.Assertions.assertTrue; + +import com.google.common.base.Joiner; +import hudson.model.Executor; +import hudson.model.Result; +import org.jenkins.plugins.lockableresources.util.Constants; +import org.jenkinsci.plugins.workflow.cps.CpsFlowDefinition; +import org.jenkinsci.plugins.workflow.job.WorkflowJob; +import org.jenkinsci.plugins.workflow.job.WorkflowRun; +import org.jenkinsci.plugins.workflow.test.steps.SemaphoreStep; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; +import org.jvnet.hudson.test.Issue; +import org.jvnet.hudson.test.JenkinsRule; +import org.jvnet.hudson.test.junit.jupiter.WithJenkins; + +/** + * Tests demonstrating how to avoid executor starvation when using lockable + * resources in declarative pipelines. + * + *
When a pipeline uses {@code agent any} (or a specific label) at the + * top level together with {@code options { lock(...) }}, the build allocates + * an executor before acquiring the lock. If the resource is busy the + * build sits on the executor doing nothing — blocking other jobs from using + * it. + * + *
The recommended pattern is to use {@code agent none} at the pipeline + * level and move the {@code agent} directive into individual stages, alongside + * the {@code lock} option. This way no executor is consumed while waiting for + * the lock. + * + * @see #697 + */ +@WithJenkins +class DeclarativePipelineExecutorStarvationTest { + + @BeforeEach + void setUp() { + System.setProperty(Constants.SYSTEM_PROPERTY_DISABLE_SAVE, "true"); + } + + // ----------------------------------------------------------------------- + // Best practice: agent none + stage-level agent + lock + // ----------------------------------------------------------------------- + + /** + * With {@code agent none} at the pipeline level and the lock acquired at + * stage level, no executor is held while waiting for the resource. A second + * job that does not need the lock can run immediately. + */ + @Test + @Issue("JENKINS-67083") + void agentNoneWithStageLevelLockDoesNotConsumeExecutorWhileWaiting(JenkinsRule j) throws Exception { + LockableResourcesManager.get().createResource("shared-resource"); + + // p1 acquires the lock and holds it via a semaphore + WorkflowJob p1 = j.createProject(WorkflowJob.class, "holder"); + p1.setDefinition(new CpsFlowDefinition( + m( + "pipeline {", + " agent none", + " stages {", + " stage('work') {", + " options {", + " lock resource: 'shared-resource'", + " }", + " agent any", + " steps {", + " semaphore 'hold-lock'", + " }", + " }", + " }", + "}"), + true)); + WorkflowRun b1 = p1.scheduleBuild2(0).waitForStart(); + SemaphoreStep.waitForStart("hold-lock/1", b1); + + // p2 also wants the lock — it should wait WITHOUT consuming an executor + WorkflowJob p2 = j.createProject(WorkflowJob.class, "waiter"); + p2.setDefinition(new CpsFlowDefinition( + m( + "pipeline {", + " agent none", + " stages {", + " stage('work') {", + " options {", + " lock resource: 'shared-resource'", + " }", + " agent any", + " steps {", + " echo 'waiter got the lock'", + " }", + " }", + " }", + "}"), + true)); + WorkflowRun b2 = p2.scheduleBuild2(0).waitForStart(); + j.waitForMessage("[Resource: shared-resource] is not free, waiting for execution", b2); + + // b2 is waiting for the lock — it uses only a flyweight (CPS) executor, + // not a real build executor, so other jobs can still run. + assertFlyweightOrNoExecutor(b2); + + // An unrelated job must be able to run while b2 waits + WorkflowJob p3 = j.createProject(WorkflowJob.class, "unrelated"); + p3.setDefinition(new CpsFlowDefinition( + m( + "pipeline {", + " agent any", + " stages {", + " stage('work') {", + " steps {", + " echo 'unrelated job ran'", + " }", + " }", + " }", + "}"), + true)); + j.assertBuildStatusSuccess(p3.scheduleBuild2(0)); + + // Release the lock holder + SemaphoreStep.success("hold-lock/1", null); + j.assertBuildStatusSuccess(j.waitForCompletion(b1)); + j.assertBuildStatusSuccess(j.waitForCompletion(b2)); + j.assertLogContains("waiter got the lock", b2); + } + + /** + * Same pattern with a label-based lock: {@code agent none} at pipeline + * level, stage-level lock + agent. Waiter does not block an executor. + */ + @Test + @Issue("JENKINS-67083") + void agentNoneWithStageLevelLabelLockDoesNotConsumeExecutor(JenkinsRule j) throws Exception { + LockableResourcesManager.get().createResourceWithLabel("board1", "hw"); + + WorkflowJob p1 = j.createProject(WorkflowJob.class, "holder"); + p1.setDefinition(new CpsFlowDefinition( + m( + "pipeline {", + " agent none", + " stages {", + " stage('work') {", + " options {", + " lock label: 'hw', resource: null, quantity: 1", + " }", + " agent any", + " steps {", + " semaphore 'hold-lock'", + " }", + " }", + " }", + "}"), + true)); + WorkflowRun b1 = p1.scheduleBuild2(0).waitForStart(); + SemaphoreStep.waitForStart("hold-lock/1", b1); + + WorkflowJob p2 = j.createProject(WorkflowJob.class, "waiter"); + p2.setDefinition(new CpsFlowDefinition( + m( + "pipeline {", + " agent none", + " stages {", + " stage('work') {", + " options {", + " lock label: 'hw', resource: null, quantity: 1", + " }", + " agent any", + " steps {", + " echo 'waiter acquired hw lock'", + " }", + " }", + " }", + "}"), + true)); + WorkflowRun b2 = p2.scheduleBuild2(0).waitForStart(); + j.waitForMessage("[Label: hw, Quantity: 1] is not free, waiting for execution", b2); + assertFlyweightOrNoExecutor(b2); + + SemaphoreStep.success("hold-lock/1", null); + j.assertBuildStatusSuccess(j.waitForCompletion(b1)); + j.assertBuildStatusSuccess(j.waitForCompletion(b2)); + j.assertLogContains("waiter acquired hw lock", b2); + } + + /** + * Demonstrates that preparation stages run without the lock, and the lock + * is only acquired for the stages that need it (no executor held during + * the wait). + */ + @Test + @Issue("JENKINS-67083") + void preparationStageRunsWithoutLock(JenkinsRule j) throws Exception { + LockableResourcesManager.get().createResource("deploy-target"); + + WorkflowJob p = j.createProject(WorkflowJob.class, "efficient"); + p.setDefinition(new CpsFlowDefinition( + m( + "pipeline {", + " agent none", + " stages {", + " stage('Build') {", + " agent any", + " steps {", + " echo 'building without lock'", + " }", + " }", + " stage('Deploy') {", + " options {", + " lock resource: 'deploy-target'", + " }", + " agent any", + " steps {", + " echo 'deploying with lock'", + " }", + " }", + " }", + "}"), + true)); + WorkflowRun b = p.scheduleBuild2(0).waitForStart(); + j.assertBuildStatusSuccess(j.waitForCompletion(b)); + j.assertLogContains("building without lock", b); + j.assertLogContains("deploying with lock", b); + j.assertLogContains("Lock acquired on [Resource: deploy-target]", b); + } + + /** + * Wrapping multiple stages in a parent stage with {@code options { lock }} + * and {@code agent} holds the lock across all nested stages but still does + * not block an executor while waiting. + */ + @Test + @Issue("JENKINS-67083") + void nestedStagesShareLockWithoutExecutorStarvation(JenkinsRule j) throws Exception { + LockableResourcesManager.get().createResource("env-resource"); + + // holder keeps the lock busy + WorkflowJob holder = j.createProject(WorkflowJob.class, "holder"); + holder.setDefinition(new CpsFlowDefinition( + m( + "pipeline {", + " agent none", + " stages {", + " stage('hold') {", + " options {", + " lock resource: 'env-resource'", + " }", + " agent any", + " steps {", + " semaphore 'hold-lock'", + " }", + " }", + " }", + "}"), + true)); + WorkflowRun bHolder = holder.scheduleBuild2(0).waitForStart(); + SemaphoreStep.waitForStart("hold-lock/1", bHolder); + + // waiter uses nested stages + WorkflowJob waiter = j.createProject(WorkflowJob.class, "waiter"); + waiter.setDefinition(new CpsFlowDefinition( + m( + "pipeline {", + " agent none", + " stages {", + " stage('Deploy and Test') {", + " options {", + " lock resource: 'env-resource'", + " }", + " agent any", + " stages {", + " stage('Deploy') {", + " steps {", + " echo 'deploying'", + " }", + " }", + " stage('Integration Test') {", + " steps {", + " echo 'testing'", + " }", + " }", + " }", + " }", + " }", + "}"), + true)); + WorkflowRun bWaiter = waiter.scheduleBuild2(0).waitForStart(); + j.waitForMessage("[Resource: env-resource] is not free, waiting for execution", bWaiter); + assertFlyweightOrNoExecutor(bWaiter); + + SemaphoreStep.success("hold-lock/1", null); + j.assertBuildStatusSuccess(j.waitForCompletion(bHolder)); + j.assertBuildStatusSuccess(j.waitForCompletion(bWaiter)); + j.assertLogContains("deploying", bWaiter); + j.assertLogContains("testing", bWaiter); + } + + /** + * Asserts that the build either has no executor or only a flyweight + * executor (number {@code -1}). A flyweight executor runs the CPS + * engine and does not consume a build-executor slot. + */ + private static void assertFlyweightOrNoExecutor(WorkflowRun run) { + Executor exec = run.getExecutor(); + assertTrue( + exec == null || exec.getNumber() == -1, + "Build should only use a flyweight executor while waiting for a lock, " + + "but holds heavyweight executor #" + + (exec == null ? "null" : exec.getNumber())); + } + + private static String m(String... lines) { + return Joiner.on('\n').join(lines); + } +} diff --git a/src/test/java/org/jenkins/plugins/lockableresources/NodesMirrorTest.java b/src/test/java/org/jenkins/plugins/lockableresources/NodesMirrorTest.java index a501befec..554453cad 100644 --- a/src/test/java/org/jenkins/plugins/lockableresources/NodesMirrorTest.java +++ b/src/test/java/org/jenkins/plugins/lockableresources/NodesMirrorTest.java @@ -18,7 +18,7 @@ class NodesMirrorTest { private static final Logger LOGGER = - Logger.getLogger(org.jenkins.plugins.lockableresources.NodesMirror.class.getName()); + Logger.getLogger(org.jenkins.plugins.lockableresources.nodes.NodesMirror.class.getName()); @Test void mirror_few_nodes(JenkinsRule j) throws Exception {