mirror of
https://gitea.com/gitea/act_runner.git
synced 2026-06-29 07:34:26 +00:00
fix: run always()/cancelled() and post steps correctly on cancellation (#1043)
## Problem
When a workflow run is **cancelled**, the runner diverged from GitHub Actions:
- Main-stage `if: ${{ always() }}` / `if: ${{ cancelled() }}` steps **did not run** at all (unlike a *failed* run, where they do).
- `if: ${{ cancelled() }}` was **structurally impossible** to satisfy — it could never be true.
GitHub Actions runs `always()`/`cancelled()` steps (and post cleanup) even when a job is cancelled. This runner only honored that for action *post* steps (since #1016), leaving main-stage cleanup steps silently skipped.
## Root causes (both in `act/`)
1. **`getJobContext()`** derived the job status purely from step conclusions, so it could only ever return `"success"` or `"failure"`. Since `cancelled()` checks `Job.Status == "cancelled"`, it was impossible — and `success()` stayed *true* on a cancelled run, so the wrong `if` branch was taken everywhere.
2. **The main step pipeline** is chained with `Executor.Then()`, which short-circuits the moment `ctx.Err() != nil`. Once the server cancelled, every not-yet-started main step (including `always()` ones) was abandoned.
## Fix
- Add a per-`RunContext` `jobCancelled` flag + `markCancelled()`. `getJobContext()` now reports `"cancelled"` (taking precedence over success/failure), so `cancelled()`/`always()` are true and `success()`/`failure()` are false — matching GitHub's "only always()/cancelled() run on cancel" semantics.
- Replace the plain main-steps pipeline with `newMainStepsExecutor`. On interruption (`context.Canceled` from a server cancel, or `context.DeadlineExceeded` from the job timeout) it marks the job cancelled and runs the **remaining** steps under a fresh context (`context.WithoutCancel` + bounded timeout) so `always()`/`cancelled()` steps run for cleanup, while default-`success()` steps skip themselves. The original interrupt error is still propagated upward.
- Backstop `markCancelled()` in the post-step `Finally` so cancellations landing outside the main loop still surface the cancelled status to post steps.
Pre-steps keep normal short-circuit behavior, and reporting (`RESULT_CANCELLED`) is untouched — that remains handled by #1016.
## Reporting semantics (unchanged by this PR)
| Run state | failing post/`always()` step reported as |
| --------- | --------------------------------------------------------------------------------------------- |
| Normal | **FAILURE** |
| Timeout | **FAILURE** (deadline path preserves the job-error container) |
| Cancelled | **CANCELLED** — cancellation wins; the failing step is logged but doesn't flip the conclusion |
The new `always()` path runs under `context.WithoutCancel`, so the job-error container is preserved — a failing `always()` step records its failure at step level and does not panic in `SetJobError`.
Fixes #657
Reviewed-on: https://gitea.com/gitea/runner/pulls/1043
Reviewed-by: Zettat123 <39446+zettat123@noreply.gitea.com>
This commit is contained in:
285
act/runner/cancellation_test.go
Normal file
285
act/runner/cancellation_test.go
Normal file
@@ -0,0 +1,285 @@
|
||||
// Copyright 2026 The Gitea Authors. All rights reserved.
|
||||
// SPDX-License-Identifier: MIT
|
||||
|
||||
package runner
|
||||
|
||||
import (
|
||||
"context"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"gitea.com/gitea/runner/act/common"
|
||||
"gitea.com/gitea/runner/act/exprparser"
|
||||
"gitea.com/gitea/runner/act/model"
|
||||
|
||||
"github.com/stretchr/testify/assert"
|
||||
"github.com/stretchr/testify/require"
|
||||
"go.yaml.in/yaml/v4"
|
||||
)
|
||||
|
||||
// TestCancelledJobStatusEnablesAlwaysAndCancelledSteps verifies that once a job is
|
||||
// cancelled, getJobContext reports the "cancelled" status so the step `if` functions
|
||||
// evaluate the way GitHub Actions does: cancelled()/always() are true, success()/failure()
|
||||
// are false. A step that defaults to success() is therefore skipped while an always() step
|
||||
// still runs. Before the fix the status could only ever be success/failure, so cancelled()
|
||||
// was structurally impossible and cancel-only cleanup steps never ran.
|
||||
func TestCancelledJobStatusEnablesAlwaysAndCancelledSteps(t *testing.T) {
|
||||
rc := createIfTestRunContext(map[string]*model.Job{
|
||||
"job1": createJob(t, `runs-on: ubuntu-latest`, ""),
|
||||
})
|
||||
rc.markCancelled()
|
||||
|
||||
// The core fix: the job status context now reports "cancelled" instead of being
|
||||
// pinned to success/failure.
|
||||
jobCtx := rc.getJobContext()
|
||||
require.Equal(t, "cancelled", jobCtx.Status)
|
||||
|
||||
// Feed that status through the step-context expression functions, which is what a
|
||||
// step `if` evaluates. On a cancelled job only always()/cancelled() are true.
|
||||
interp := exprparser.NewInterpeter(
|
||||
&exprparser.EvaluationEnvironment{Job: jobCtx},
|
||||
exprparser.Config{Context: "step"},
|
||||
)
|
||||
for expr, want := range map[string]bool{
|
||||
"cancelled()": true,
|
||||
"always()": true,
|
||||
"success()": false,
|
||||
"failure()": false,
|
||||
"!cancelled()": false,
|
||||
} {
|
||||
got, err := interp.Evaluate(expr, exprparser.DefaultStatusCheckNone)
|
||||
require.NoErrorf(t, err, "Evaluate(%q)", expr)
|
||||
assert.Equalf(t, want, got, "Evaluate(%q) on a cancelled job", expr)
|
||||
}
|
||||
|
||||
// A step without an `if` defaults to success() and must be skipped on cancel,
|
||||
// while an `if: always()` step must still run.
|
||||
disabled, err := interp.Evaluate("", exprparser.DefaultStatusCheckSuccess)
|
||||
require.NoError(t, err)
|
||||
assert.Equal(t, false, disabled, "default-success step must be skipped on a cancelled job")
|
||||
|
||||
enabled, err := interp.Evaluate("always()", exprparser.DefaultStatusCheckSuccess)
|
||||
require.NoError(t, err)
|
||||
assert.Equal(t, true, enabled, "`if: always()` step must run on a cancelled job")
|
||||
}
|
||||
|
||||
// TestMainStepsExecutorRunsAlwaysStepsAfterCancel verifies that newMainStepsExecutor does
|
||||
// not abandon the remaining steps when the run is cancelled mid-pipeline. The later step
|
||||
// still runs (so a main-stage always() step is reached), it runs under a fresh,
|
||||
// non-cancelled context, and the job is marked cancelled. The interrupt error is still
|
||||
// propagated so callers up the chain see the cancellation.
|
||||
func TestMainStepsExecutorRunsAlwaysStepsAfterCancel(t *testing.T) {
|
||||
rc := createIfTestRunContext(map[string]*model.Job{
|
||||
"job1": createJob(t, `runs-on: ubuntu-latest`, ""),
|
||||
})
|
||||
|
||||
ctx, cancel := context.WithCancel(context.Background())
|
||||
defer cancel()
|
||||
|
||||
var ran []string
|
||||
var laterStepCtxErr error
|
||||
steps := []common.Executor{
|
||||
func(_ context.Context) error {
|
||||
ran = append(ran, "step1")
|
||||
cancel() // server cancellation lands while step1 runs
|
||||
return nil
|
||||
},
|
||||
func(c context.Context) error {
|
||||
ran = append(ran, "always-step")
|
||||
laterStepCtxErr = c.Err()
|
||||
return nil
|
||||
},
|
||||
}
|
||||
|
||||
err := newMainStepsExecutor(rc, steps)(ctx)
|
||||
|
||||
require.ErrorIs(t, err, context.Canceled, "interrupt error is propagated")
|
||||
assert.Equal(t, []string{"step1", "always-step"}, ran, "the always() step still runs after cancel")
|
||||
require.NoError(t, laterStepCtxErr, "remaining steps run under a fresh, non-cancelled context")
|
||||
assert.True(t, rc.jobCancelled, "the job is marked cancelled")
|
||||
}
|
||||
|
||||
// TestMainStepsExecutorMarksFailedOnTimeoutBetweenSteps guards the timeout path's symmetry with the cancel path.
|
||||
// When the job deadline (timeout-minutes) lands in the gap between two steps, the job must be marked as failed (not cancelled),
|
||||
// so always()/failure() cleanup steps run while default success() steps skip, and so the timed-out job is not reported as success.
|
||||
func TestMainStepsExecutorMarksFailedOnTimeoutBetweenSteps(t *testing.T) {
|
||||
rc := createIfTestRunContext(map[string]*model.Job{
|
||||
"job1": createJob(t, `runs-on: ubuntu-latest`, ""),
|
||||
})
|
||||
|
||||
// A short deadline that we let elapse between steps, so no step records the error itself.
|
||||
ctx, cancel := context.WithTimeout(context.Background(), 10*time.Millisecond)
|
||||
defer cancel()
|
||||
|
||||
var ran []string
|
||||
var laterStepCtxErr error
|
||||
steps := []common.Executor{
|
||||
func(c context.Context) error {
|
||||
ran = append(ran, "step1")
|
||||
// Block until the job deadline elapses, then return cleanly: the interrupt lands in the loop's between-steps check, not inside a step.
|
||||
<-c.Done()
|
||||
return nil
|
||||
},
|
||||
func(c context.Context) error {
|
||||
ran = append(ran, "always-step")
|
||||
laterStepCtxErr = c.Err()
|
||||
return nil
|
||||
},
|
||||
}
|
||||
|
||||
err := newMainStepsExecutor(rc, steps)(ctx)
|
||||
|
||||
require.ErrorIs(t, err, context.DeadlineExceeded, "the timeout error is propagated")
|
||||
assert.Equal(t, []string{"step1", "always-step"}, ran, "the always() step still runs after a timeout")
|
||||
require.NoError(t, laterStepCtxErr, "remaining steps run under a fresh, non-expired context")
|
||||
assert.True(t, rc.jobFailed, "a job timeout marks the job failed")
|
||||
assert.False(t, rc.jobCancelled, "a timeout is not a cancellation")
|
||||
|
||||
// The status the real main-step `if` evaluation sees: "failure", so default success() steps skip while always()/failure() steps run.
|
||||
assert.Equal(t, "failure", rc.getJobContext().Status)
|
||||
}
|
||||
|
||||
// TestStepsExecutorRunsMainStepsAfterPreCancel verifies that a cancellation landing during the
|
||||
// pre phase does not abandon the main steps: newStepsExecutor still runs the main-steps executor,
|
||||
// so a main-stage always()/cancelled() step is reached (under a fresh, non-cancelled context),
|
||||
// the job is marked cancelled, and the cancellation is propagated. Before the fix the `.Then(...)`
|
||||
// short-circuit skipped the main steps entirely when a pre step was cancelled.
|
||||
func TestStepsExecutorRunsMainStepsAfterPreCancel(t *testing.T) {
|
||||
rc := createIfTestRunContext(map[string]*model.Job{
|
||||
"job1": createJob(t, `runs-on: ubuntu-latest`, ""),
|
||||
})
|
||||
|
||||
ctx, cancel := context.WithCancel(context.Background())
|
||||
defer cancel()
|
||||
|
||||
var ran []string
|
||||
var mainStepCtxErr error
|
||||
preSteps := []common.Executor{
|
||||
func(_ context.Context) error {
|
||||
ran = append(ran, "pre1")
|
||||
cancel() // server cancellation lands during the pre phase
|
||||
return nil
|
||||
},
|
||||
}
|
||||
steps := []common.Executor{
|
||||
func(c context.Context) error {
|
||||
ran = append(ran, "always-step")
|
||||
mainStepCtxErr = c.Err()
|
||||
return nil
|
||||
},
|
||||
}
|
||||
|
||||
err := newStepsExecutor(rc, preSteps, steps)(ctx)
|
||||
|
||||
require.ErrorIs(t, err, context.Canceled, "the cancellation is propagated")
|
||||
assert.Equal(t, []string{"pre1", "always-step"}, ran, "the main always() step runs after a pre-phase cancel")
|
||||
require.NoError(t, mainStepCtxErr, "the main step runs under a fresh, non-cancelled context")
|
||||
assert.True(t, rc.jobCancelled, "the job is marked cancelled")
|
||||
}
|
||||
|
||||
// TestStepsExecutorRunsMainStepsAfterPreFailure verifies that a failing pre step does not abandon
|
||||
// the main steps: they still run (so a main-stage always()/failure() step is reached), and the
|
||||
// pre-step error is propagated so the job is reported as failed. The main steps' own `if`
|
||||
// evaluation is what skips success()-default steps, so running them here is safe.
|
||||
func TestStepsExecutorRunsMainStepsAfterPreFailure(t *testing.T) {
|
||||
rc := createIfTestRunContext(map[string]*model.Job{
|
||||
"job1": createJob(t, `runs-on: ubuntu-latest`, ""),
|
||||
})
|
||||
|
||||
var ran []string
|
||||
preSteps := []common.Executor{
|
||||
func(_ context.Context) error {
|
||||
ran = append(ran, "pre1")
|
||||
return assert.AnError
|
||||
},
|
||||
}
|
||||
steps := []common.Executor{
|
||||
func(_ context.Context) error {
|
||||
ran = append(ran, "always-step")
|
||||
return nil
|
||||
},
|
||||
}
|
||||
|
||||
err := newStepsExecutor(rc, preSteps, steps)(context.Background())
|
||||
|
||||
require.ErrorIs(t, err, assert.AnError, "the pre-step error is propagated")
|
||||
assert.Equal(t, []string{"pre1", "always-step"}, ran, "the main always() step runs after a pre-step failure")
|
||||
assert.False(t, rc.jobCancelled, "a pre-step failure is not a cancellation")
|
||||
}
|
||||
|
||||
// TestPreStepFailureAffectsMainStepIfStatus verifies the status path used by real
|
||||
// main-step `if` evaluation. A pre-step failure is not present in StepResults, so
|
||||
// recording only the context job error is not enough: getJobContext must also report
|
||||
// failure so success()-default main steps skip and failure() steps run.
|
||||
func TestPreStepFailureAffectsMainStepIfStatus(t *testing.T) {
|
||||
rc := createIfTestRunContext(map[string]*model.Job{
|
||||
"job1": createJob(t, `runs-on: ubuntu-latest`, ""),
|
||||
})
|
||||
ctx := common.WithJobErrorContainer(context.Background())
|
||||
|
||||
reportStepError(ctx, rc, assert.AnError)
|
||||
|
||||
assert.Equal(t, "failure", rc.getJobContext().Status)
|
||||
require.ErrorIs(t, common.JobError(ctx), assert.AnError)
|
||||
|
||||
defaultStep := &stepRun{
|
||||
RunContext: rc,
|
||||
Step: &model.Step{ID: "default-step"},
|
||||
env: map[string]string{},
|
||||
}
|
||||
defaultEnabled, err := isStepEnabled(ctx, defaultStep.getIfExpression(ctx, stepStageMain), defaultStep, stepStageMain)
|
||||
require.NoError(t, err)
|
||||
assert.False(t, defaultEnabled, "default success() main step must skip after a pre-step failure")
|
||||
|
||||
failureStep := &stepRun{
|
||||
RunContext: rc,
|
||||
Step: &model.Step{
|
||||
ID: "failure-step",
|
||||
If: yaml.Node{Value: "failure()"},
|
||||
},
|
||||
env: map[string]string{},
|
||||
}
|
||||
failureEnabled, err := isStepEnabled(ctx, failureStep.getIfExpression(ctx, stepStageMain), failureStep, stepStageMain)
|
||||
require.NoError(t, err)
|
||||
assert.True(t, failureEnabled, "failure() main step must run after a pre-step failure")
|
||||
}
|
||||
|
||||
// TestPostStepsContextCancelledIsUsableForFailingStep guards against a panic: post/cleanup
|
||||
// steps run on a context derived from the cancelled job context, and a failing post step
|
||||
// records its error via common.SetJobError. If that derived context lacks a job-error container,
|
||||
// SetJobError dereferences a nil map and panics. The post context must therefore be detached
|
||||
// from cancellation (so the steps run) yet still carry a usable error container.
|
||||
func TestPostStepsContextCancelledIsUsableForFailingStep(t *testing.T) {
|
||||
cancelled, cancel := context.WithCancel(common.WithJobErrorContainer(context.Background()))
|
||||
cancel()
|
||||
require.ErrorIs(t, cancelled.Err(), context.Canceled)
|
||||
|
||||
postCtx, done := postStepsContext(cancelled)
|
||||
defer done()
|
||||
|
||||
// Detached from cancellation, so the post steps actually run.
|
||||
require.NoError(t, postCtx.Err(), "post context must not be cancelled")
|
||||
|
||||
// A failing post step records its error instead of panicking.
|
||||
require.NotPanics(t, func() {
|
||||
common.SetJobError(postCtx, assert.AnError)
|
||||
}, "a failing post step must not panic on the cancel path")
|
||||
assert.ErrorIs(t, common.JobError(postCtx), assert.AnError)
|
||||
}
|
||||
|
||||
// TestPostStepsContextDeadlinePreservesJobError verifies the job-timeout path keeps the original
|
||||
// job-error container (via context.WithoutCancel), so the timeout failure and any post-step error
|
||||
// survive into the post phase and the job is still reported as failed.
|
||||
func TestPostStepsContextDeadlinePreservesJobError(t *testing.T) {
|
||||
base := common.WithJobErrorContainer(context.Background())
|
||||
common.SetJobError(base, assert.AnError)
|
||||
expired, cancel := context.WithDeadline(base, time.Now().Add(-time.Hour))
|
||||
defer cancel()
|
||||
require.ErrorIs(t, expired.Err(), context.DeadlineExceeded)
|
||||
|
||||
postCtx, done := postStepsContext(expired)
|
||||
defer done()
|
||||
|
||||
require.NoError(t, postCtx.Err(), "post context must not carry the expired deadline")
|
||||
assert.ErrorIs(t, common.JobError(postCtx), assert.AnError, "the timeout job error must be preserved")
|
||||
}
|
||||
@@ -58,9 +58,10 @@ type jobInfo interface {
|
||||
|
||||
// reportStepError emits the GitHub Actions ##[error] annotation and records
|
||||
// the error against the job so the job is reported as failed.
|
||||
func reportStepError(ctx context.Context, err error) {
|
||||
func reportStepError(ctx context.Context, rc *RunContext, err error) {
|
||||
common.Logger(ctx).Errorf("##[error]%v", err)
|
||||
common.SetJobError(ctx, err)
|
||||
rc.markFailed()
|
||||
}
|
||||
|
||||
func newJobExecutor(info jobInfo, sf stepFactory, rc *RunContext) common.Executor {
|
||||
@@ -118,9 +119,9 @@ func newJobExecutor(info jobInfo, sf stepFactory, rc *RunContext) common.Executo
|
||||
rc.CurrentStepIndex = stepIdx
|
||||
preErr := preExec(ctx)
|
||||
if preErr != nil {
|
||||
reportStepError(ctx, preErr)
|
||||
reportStepError(ctx, rc, preErr)
|
||||
} else if ctx.Err() != nil {
|
||||
reportStepError(ctx, ctx.Err())
|
||||
reportStepError(ctx, rc, ctx.Err())
|
||||
}
|
||||
return preErr
|
||||
}))
|
||||
@@ -130,9 +131,9 @@ func newJobExecutor(info jobInfo, sf stepFactory, rc *RunContext) common.Executo
|
||||
rc.CurrentStepIndex = stepIdx
|
||||
err := stepExec(ctx)
|
||||
if err != nil {
|
||||
reportStepError(ctx, err)
|
||||
reportStepError(ctx, rc, err)
|
||||
} else if ctx.Err() != nil {
|
||||
reportStepError(ctx, ctx.Err())
|
||||
reportStepError(ctx, rc, ctx.Err())
|
||||
}
|
||||
return nil
|
||||
}))
|
||||
@@ -142,9 +143,9 @@ func newJobExecutor(info jobInfo, sf stepFactory, rc *RunContext) common.Executo
|
||||
rc.CurrentStepIndex = stepIdx
|
||||
err := postFn(ctx)
|
||||
if err != nil {
|
||||
reportStepError(ctx, err)
|
||||
reportStepError(ctx, rc, err)
|
||||
} else if ctx.Err() != nil {
|
||||
reportStepError(ctx, ctx.Err())
|
||||
reportStepError(ctx, rc, ctx.Err())
|
||||
}
|
||||
return err
|
||||
})
|
||||
@@ -159,7 +160,12 @@ func newJobExecutor(info jobInfo, sf stepFactory, rc *RunContext) common.Executo
|
||||
postExecutor = postExecutor.Finally(func(ctx context.Context) error {
|
||||
jobError := common.JobError(ctx)
|
||||
var err error
|
||||
if rc.Config.AutoRemove || jobError == nil {
|
||||
// jobError == nil keeps a failed job's container alive for post-mortem debugging when
|
||||
// AutoRemove is off (the act-CLI --rm behavior; the shipped runner always sets
|
||||
// AutoRemove). A cancelled run is not a failure to inspect, and the cancel-path post
|
||||
// context now carries its own error container so a failing post step makes jobError
|
||||
// non-nil — OR in rc.jobCancelled so cancellation still always tears the container down.
|
||||
if rc.Config.AutoRemove || jobError == nil || rc.jobCancelled {
|
||||
// always allow 1 min for stopping and removing the runner, even if we were cancelled
|
||||
ctx, cancel := context.WithTimeout(common.WithLogger(context.Background(), common.Logger(ctx)), time.Minute)
|
||||
defer cancel()
|
||||
@@ -198,35 +204,107 @@ func newJobExecutor(info jobInfo, sf stepFactory, rc *RunContext) common.Executo
|
||||
return err
|
||||
})
|
||||
|
||||
pipeline := make([]common.Executor, 0)
|
||||
pipeline = append(pipeline, preSteps...)
|
||||
pipeline = append(pipeline, steps...)
|
||||
stepsExecutor := newStepsExecutor(rc, preSteps, steps)
|
||||
|
||||
return common.NewPipelineExecutor(info.startContainer(), common.NewPipelineExecutor(pipeline...).
|
||||
return common.NewPipelineExecutor(info.startContainer(), stepsExecutor.
|
||||
Finally(func(ctx context.Context) error {
|
||||
var cancel context.CancelFunc
|
||||
switch ctx.Err() {
|
||||
case context.Canceled:
|
||||
// in case of an aborted run, we still should execute the
|
||||
// post steps to allow cleanup.
|
||||
ctx, cancel = context.WithTimeout(common.WithLogger(context.Background(), common.Logger(ctx)), 5*time.Minute)
|
||||
defer cancel()
|
||||
case context.DeadlineExceeded:
|
||||
// The job hit its timeout-minutes. Without a fresh context the post
|
||||
// steps would run against the already-expired context and be skipped,
|
||||
// so cleanup post-hooks (e.g. actions/checkout post, cache save) would
|
||||
// not run. Derive the context with WithoutCancel so the new deadline
|
||||
// applies but the job error state is preserved: the job is still
|
||||
// reported as failed and container teardown matches a normal failure.
|
||||
ctx, cancel = context.WithTimeout(context.WithoutCancel(ctx), 5*time.Minute)
|
||||
defer cancel()
|
||||
}
|
||||
return postExecutor(ctx)
|
||||
// Record an interrupt (backstop for interrupts that land outside the main
|
||||
// step loop) so the post steps observe the cancelled/failed job status.
|
||||
rc.markInterrupted(ctx.Err())
|
||||
postCtx, cancel := postStepsContext(ctx)
|
||||
defer cancel()
|
||||
return postExecutor(postCtx)
|
||||
}).
|
||||
Finally(info.interpolateOutputs()).
|
||||
Finally(info.closeContainer()))
|
||||
}
|
||||
|
||||
// postStepsContext derives the context used to run the job's post/cleanup steps from the
|
||||
// finished main-pipeline context. Cleanup has to run even when the run was interrupted, so the
|
||||
// returned context always carries a fresh bounded deadline and is never itself cancelled.
|
||||
//
|
||||
// - context.Canceled (server cancel): detach from the cancelled context via a fresh root so
|
||||
// the post steps can run.
|
||||
// - context.DeadlineExceeded (job timeout): detach the deadline with WithoutCancel, which
|
||||
// keeps the original values — including the job-error container — so the timeout failure and
|
||||
// any post-step error are preserved and the job is still reported as failed.
|
||||
// - otherwise: run on the live context unchanged.
|
||||
func postStepsContext(ctx context.Context) (context.Context, context.CancelFunc) {
|
||||
switch ctx.Err() {
|
||||
case context.Canceled:
|
||||
// The cancelled context is abandoned for a fresh root, which drops the job-error
|
||||
// container installed at the job root. Re-attach a fresh one so a failing post step
|
||||
// records its error via SetJobError instead of panicking on a nil container.
|
||||
return context.WithTimeout(common.WithJobErrorContainer(common.WithLogger(context.Background(), common.Logger(ctx))), 5*time.Minute)
|
||||
case context.DeadlineExceeded:
|
||||
return context.WithTimeout(context.WithoutCancel(ctx), 5*time.Minute)
|
||||
default:
|
||||
return ctx, func() {}
|
||||
}
|
||||
}
|
||||
|
||||
// newStepsExecutor sequences the job's pre steps and main steps.
|
||||
//
|
||||
// The pre steps run as a normal pipeline that short-circuits on the first failure or
|
||||
// cancellation. The main-steps executor then runs unconditionally — even if a pre step failed
|
||||
// or the job was interrupted — so always()/cancelled()/failure() main steps still run, mirroring
|
||||
// GitHub Actions. This is safe because each main step re-evaluates its own `if` (a pre-step
|
||||
// failure flips the expression job status to failure, so success()-default steps skip) and
|
||||
// newMainStepsExecutor detaches from an interrupted context before running the remaining steps.
|
||||
//
|
||||
// A pre-step failure or interrupt is still propagated so the job is reported with the correct
|
||||
// conclusion; the pre error takes precedence since it happened first.
|
||||
func newStepsExecutor(rc *RunContext, preSteps, steps []common.Executor) common.Executor {
|
||||
preExecutor := common.NewPipelineExecutor(preSteps...)
|
||||
mainExecutor := newMainStepsExecutor(rc, steps)
|
||||
return func(ctx context.Context) error {
|
||||
preErr := preExecutor(ctx)
|
||||
mainErr := mainExecutor(ctx)
|
||||
if preErr != nil {
|
||||
return preErr
|
||||
}
|
||||
return mainErr
|
||||
}
|
||||
}
|
||||
|
||||
// newMainStepsExecutor runs the job's main-stage step executors in order. Unlike a plain
|
||||
// pipeline, an interruption (context.Canceled from a server cancel, or context.DeadlineExceeded
|
||||
// from the job timeout) does not abandon the remaining steps: it marks the job cancelled when
|
||||
// appropriate and keeps iterating under a fresh, bounded context so steps whose `if` still
|
||||
// evaluates true — always() and cancelled() — run for cleanup, mirroring GitHub Actions. Steps
|
||||
// that default to success() skip themselves because success() is false once the job is no longer
|
||||
// successful. The main-step wrappers report their own errors and return nil, so the loop drives
|
||||
// step ordering off the context, not return values.
|
||||
func newMainStepsExecutor(rc *RunContext, steps []common.Executor) common.Executor {
|
||||
return func(ctx context.Context) error {
|
||||
for i, step := range steps {
|
||||
if ctx.Err() != nil {
|
||||
return runMainStepsAfterInterrupt(ctx, rc, steps[i:])
|
||||
}
|
||||
_ = step(ctx)
|
||||
}
|
||||
// An interrupt can land during the final step, after the loop's last context
|
||||
// check; record it so the post steps still observe the cancelled/failed status.
|
||||
rc.markInterrupted(ctx.Err())
|
||||
return nil
|
||||
}
|
||||
}
|
||||
|
||||
// runMainStepsAfterInterrupt runs the remaining main steps after the job context was cancelled or
|
||||
// timed out. It detaches from the interrupted context (keeping its values: logger and job error)
|
||||
// and applies a fresh deadline so always()/cancelled() steps run to completion. The original
|
||||
// interrupt error is returned so callers up the chain still see the job as cancelled/timed out.
|
||||
func runMainStepsAfterInterrupt(ctx context.Context, rc *RunContext, steps []common.Executor) error {
|
||||
interruptErr := ctx.Err()
|
||||
rc.markInterrupted(interruptErr)
|
||||
freshCtx, cancel := context.WithTimeout(context.WithoutCancel(ctx), 5*time.Minute)
|
||||
defer cancel()
|
||||
for _, step := range steps {
|
||||
_ = step(freshCtx)
|
||||
}
|
||||
return interruptErr
|
||||
}
|
||||
|
||||
func setJobResult(ctx context.Context, info jobInfo, rc *RunContext, success bool) {
|
||||
logger := common.Logger(ctx)
|
||||
|
||||
|
||||
@@ -73,6 +73,39 @@ type RunContext struct {
|
||||
// captured before execution so each matrix combo interpolates from the originals rather
|
||||
// than from a sibling's already-resolved values written into the shared Job.Outputs.
|
||||
outputTemplate map[string]string
|
||||
// jobCancelled records that this job's run was cancelled (context.Canceled). It makes
|
||||
// getJobContext report the "cancelled" status so cancelled()/always() evaluate the way
|
||||
// GitHub Actions does, letting cleanup and always() steps run while normal steps skip.
|
||||
jobCancelled bool
|
||||
// jobFailed records failures outside normal main-step results, such as action pre-step
|
||||
// failures. Those failures must still make success() false and failure() true for later
|
||||
// main-step if evaluation.
|
||||
jobFailed bool
|
||||
}
|
||||
|
||||
// markCancelled flags the job as cancelled so subsequent step `if` evaluations and the
|
||||
// job status context observe the "cancelled" state.
|
||||
func (rc *RunContext) markCancelled() {
|
||||
rc.jobCancelled = true
|
||||
}
|
||||
|
||||
// markFailed flags the job as failed so subsequent step `if` evaluations observe
|
||||
// failure even when the error happened outside a main step result.
|
||||
func (rc *RunContext) markFailed() {
|
||||
rc.jobFailed = true
|
||||
}
|
||||
|
||||
// markInterrupted records the job's interruption status from a context error so later step `if` evaluations and the job result observe it,
|
||||
// keeping the timeout path symmetric with the cancel path:
|
||||
// - context.Canceled (server cancel) marks the job cancelled, matching GitHub's "only always()/cancelled() run on cancel".
|
||||
// - context.DeadlineExceeded (job timeout-minutes) marks the job failed, matching the "Timeout -> FAILURE" reporting semantics.
|
||||
func (rc *RunContext) markInterrupted(err error) {
|
||||
switch {
|
||||
case errors.Is(err, context.Canceled):
|
||||
rc.markCancelled()
|
||||
case errors.Is(err, context.DeadlineExceeded):
|
||||
rc.markFailed()
|
||||
}
|
||||
}
|
||||
|
||||
func (rc *RunContext) AddMask(mask string) {
|
||||
@@ -904,12 +937,21 @@ func trimToLen(s string, l int) string {
|
||||
|
||||
func (rc *RunContext) getJobContext() *model.JobContext {
|
||||
jobStatus := "success"
|
||||
if rc.jobFailed {
|
||||
jobStatus = "failure"
|
||||
}
|
||||
for _, stepStatus := range rc.StepResults {
|
||||
if stepStatus.Conclusion == model.StepStatusFailure {
|
||||
jobStatus = "failure"
|
||||
break
|
||||
}
|
||||
}
|
||||
// A cancelled run takes precedence over success/failure so cancelled() is true and
|
||||
// success()/failure() are false, matching GitHub Actions: on cancellation only
|
||||
// always() and cancelled() steps run.
|
||||
if rc.jobCancelled {
|
||||
jobStatus = "cancelled"
|
||||
}
|
||||
return &model.JobContext{
|
||||
Status: jobStatus,
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user