mirror of
https://gitea.com/gitea/act_runner.git
synced 2026-06-29 15:44:42 +00:00
## Problem
When a workflow run is **cancelled**, the runner diverged from GitHub Actions:
- Main-stage `if: ${{ always() }}` / `if: ${{ cancelled() }}` steps **did not run** at all (unlike a *failed* run, where they do).
- `if: ${{ cancelled() }}` was **structurally impossible** to satisfy — it could never be true.
GitHub Actions runs `always()`/`cancelled()` steps (and post cleanup) even when a job is cancelled. This runner only honored that for action *post* steps (since #1016), leaving main-stage cleanup steps silently skipped.
## Root causes (both in `act/`)
1. **`getJobContext()`** derived the job status purely from step conclusions, so it could only ever return `"success"` or `"failure"`. Since `cancelled()` checks `Job.Status == "cancelled"`, it was impossible — and `success()` stayed *true* on a cancelled run, so the wrong `if` branch was taken everywhere.
2. **The main step pipeline** is chained with `Executor.Then()`, which short-circuits the moment `ctx.Err() != nil`. Once the server cancelled, every not-yet-started main step (including `always()` ones) was abandoned.
## Fix
- Add a per-`RunContext` `jobCancelled` flag + `markCancelled()`. `getJobContext()` now reports `"cancelled"` (taking precedence over success/failure), so `cancelled()`/`always()` are true and `success()`/`failure()` are false — matching GitHub's "only always()/cancelled() run on cancel" semantics.
- Replace the plain main-steps pipeline with `newMainStepsExecutor`. On interruption (`context.Canceled` from a server cancel, or `context.DeadlineExceeded` from the job timeout) it marks the job cancelled and runs the **remaining** steps under a fresh context (`context.WithoutCancel` + bounded timeout) so `always()`/`cancelled()` steps run for cleanup, while default-`success()` steps skip themselves. The original interrupt error is still propagated upward.
- Backstop `markCancelled()` in the post-step `Finally` so cancellations landing outside the main loop still surface the cancelled status to post steps.
Pre-steps keep normal short-circuit behavior, and reporting (`RESULT_CANCELLED`) is untouched — that remains handled by #1016.
## Reporting semantics (unchanged by this PR)
| Run state | failing post/`always()` step reported as |
| --------- | --------------------------------------------------------------------------------------------- |
| Normal | **FAILURE** |
| Timeout | **FAILURE** (deadline path preserves the job-error container) |
| Cancelled | **CANCELLED** — cancellation wins; the failing step is logged but doesn't flip the conclusion |
The new `always()` path runs under `context.WithoutCancel`, so the job-error container is preserved — a failing `always()` step records its failure at step level and does not panic in `SetJobError`.
Fixes #657
Reviewed-on: https://gitea.com/gitea/runner/pulls/1043
Reviewed-by: Zettat123 <39446+zettat123@noreply.gitea.com>
286 lines
12 KiB
Go
286 lines
12 KiB
Go
// Copyright 2026 The Gitea Authors. All rights reserved.
|
|
// SPDX-License-Identifier: MIT
|
|
|
|
package runner
|
|
|
|
import (
|
|
"context"
|
|
"testing"
|
|
"time"
|
|
|
|
"gitea.com/gitea/runner/act/common"
|
|
"gitea.com/gitea/runner/act/exprparser"
|
|
"gitea.com/gitea/runner/act/model"
|
|
|
|
"github.com/stretchr/testify/assert"
|
|
"github.com/stretchr/testify/require"
|
|
"go.yaml.in/yaml/v4"
|
|
)
|
|
|
|
// TestCancelledJobStatusEnablesAlwaysAndCancelledSteps verifies that once a job is
|
|
// cancelled, getJobContext reports the "cancelled" status so the step `if` functions
|
|
// evaluate the way GitHub Actions does: cancelled()/always() are true, success()/failure()
|
|
// are false. A step that defaults to success() is therefore skipped while an always() step
|
|
// still runs. Before the fix the status could only ever be success/failure, so cancelled()
|
|
// was structurally impossible and cancel-only cleanup steps never ran.
|
|
func TestCancelledJobStatusEnablesAlwaysAndCancelledSteps(t *testing.T) {
|
|
rc := createIfTestRunContext(map[string]*model.Job{
|
|
"job1": createJob(t, `runs-on: ubuntu-latest`, ""),
|
|
})
|
|
rc.markCancelled()
|
|
|
|
// The core fix: the job status context now reports "cancelled" instead of being
|
|
// pinned to success/failure.
|
|
jobCtx := rc.getJobContext()
|
|
require.Equal(t, "cancelled", jobCtx.Status)
|
|
|
|
// Feed that status through the step-context expression functions, which is what a
|
|
// step `if` evaluates. On a cancelled job only always()/cancelled() are true.
|
|
interp := exprparser.NewInterpeter(
|
|
&exprparser.EvaluationEnvironment{Job: jobCtx},
|
|
exprparser.Config{Context: "step"},
|
|
)
|
|
for expr, want := range map[string]bool{
|
|
"cancelled()": true,
|
|
"always()": true,
|
|
"success()": false,
|
|
"failure()": false,
|
|
"!cancelled()": false,
|
|
} {
|
|
got, err := interp.Evaluate(expr, exprparser.DefaultStatusCheckNone)
|
|
require.NoErrorf(t, err, "Evaluate(%q)", expr)
|
|
assert.Equalf(t, want, got, "Evaluate(%q) on a cancelled job", expr)
|
|
}
|
|
|
|
// A step without an `if` defaults to success() and must be skipped on cancel,
|
|
// while an `if: always()` step must still run.
|
|
disabled, err := interp.Evaluate("", exprparser.DefaultStatusCheckSuccess)
|
|
require.NoError(t, err)
|
|
assert.Equal(t, false, disabled, "default-success step must be skipped on a cancelled job")
|
|
|
|
enabled, err := interp.Evaluate("always()", exprparser.DefaultStatusCheckSuccess)
|
|
require.NoError(t, err)
|
|
assert.Equal(t, true, enabled, "`if: always()` step must run on a cancelled job")
|
|
}
|
|
|
|
// TestMainStepsExecutorRunsAlwaysStepsAfterCancel verifies that newMainStepsExecutor does
|
|
// not abandon the remaining steps when the run is cancelled mid-pipeline. The later step
|
|
// still runs (so a main-stage always() step is reached), it runs under a fresh,
|
|
// non-cancelled context, and the job is marked cancelled. The interrupt error is still
|
|
// propagated so callers up the chain see the cancellation.
|
|
func TestMainStepsExecutorRunsAlwaysStepsAfterCancel(t *testing.T) {
|
|
rc := createIfTestRunContext(map[string]*model.Job{
|
|
"job1": createJob(t, `runs-on: ubuntu-latest`, ""),
|
|
})
|
|
|
|
ctx, cancel := context.WithCancel(context.Background())
|
|
defer cancel()
|
|
|
|
var ran []string
|
|
var laterStepCtxErr error
|
|
steps := []common.Executor{
|
|
func(_ context.Context) error {
|
|
ran = append(ran, "step1")
|
|
cancel() // server cancellation lands while step1 runs
|
|
return nil
|
|
},
|
|
func(c context.Context) error {
|
|
ran = append(ran, "always-step")
|
|
laterStepCtxErr = c.Err()
|
|
return nil
|
|
},
|
|
}
|
|
|
|
err := newMainStepsExecutor(rc, steps)(ctx)
|
|
|
|
require.ErrorIs(t, err, context.Canceled, "interrupt error is propagated")
|
|
assert.Equal(t, []string{"step1", "always-step"}, ran, "the always() step still runs after cancel")
|
|
require.NoError(t, laterStepCtxErr, "remaining steps run under a fresh, non-cancelled context")
|
|
assert.True(t, rc.jobCancelled, "the job is marked cancelled")
|
|
}
|
|
|
|
// TestMainStepsExecutorMarksFailedOnTimeoutBetweenSteps guards the timeout path's symmetry with the cancel path.
|
|
// When the job deadline (timeout-minutes) lands in the gap between two steps, the job must be marked as failed (not cancelled),
|
|
// so always()/failure() cleanup steps run while default success() steps skip, and so the timed-out job is not reported as success.
|
|
func TestMainStepsExecutorMarksFailedOnTimeoutBetweenSteps(t *testing.T) {
|
|
rc := createIfTestRunContext(map[string]*model.Job{
|
|
"job1": createJob(t, `runs-on: ubuntu-latest`, ""),
|
|
})
|
|
|
|
// A short deadline that we let elapse between steps, so no step records the error itself.
|
|
ctx, cancel := context.WithTimeout(context.Background(), 10*time.Millisecond)
|
|
defer cancel()
|
|
|
|
var ran []string
|
|
var laterStepCtxErr error
|
|
steps := []common.Executor{
|
|
func(c context.Context) error {
|
|
ran = append(ran, "step1")
|
|
// Block until the job deadline elapses, then return cleanly: the interrupt lands in the loop's between-steps check, not inside a step.
|
|
<-c.Done()
|
|
return nil
|
|
},
|
|
func(c context.Context) error {
|
|
ran = append(ran, "always-step")
|
|
laterStepCtxErr = c.Err()
|
|
return nil
|
|
},
|
|
}
|
|
|
|
err := newMainStepsExecutor(rc, steps)(ctx)
|
|
|
|
require.ErrorIs(t, err, context.DeadlineExceeded, "the timeout error is propagated")
|
|
assert.Equal(t, []string{"step1", "always-step"}, ran, "the always() step still runs after a timeout")
|
|
require.NoError(t, laterStepCtxErr, "remaining steps run under a fresh, non-expired context")
|
|
assert.True(t, rc.jobFailed, "a job timeout marks the job failed")
|
|
assert.False(t, rc.jobCancelled, "a timeout is not a cancellation")
|
|
|
|
// The status the real main-step `if` evaluation sees: "failure", so default success() steps skip while always()/failure() steps run.
|
|
assert.Equal(t, "failure", rc.getJobContext().Status)
|
|
}
|
|
|
|
// TestStepsExecutorRunsMainStepsAfterPreCancel verifies that a cancellation landing during the
|
|
// pre phase does not abandon the main steps: newStepsExecutor still runs the main-steps executor,
|
|
// so a main-stage always()/cancelled() step is reached (under a fresh, non-cancelled context),
|
|
// the job is marked cancelled, and the cancellation is propagated. Before the fix the `.Then(...)`
|
|
// short-circuit skipped the main steps entirely when a pre step was cancelled.
|
|
func TestStepsExecutorRunsMainStepsAfterPreCancel(t *testing.T) {
|
|
rc := createIfTestRunContext(map[string]*model.Job{
|
|
"job1": createJob(t, `runs-on: ubuntu-latest`, ""),
|
|
})
|
|
|
|
ctx, cancel := context.WithCancel(context.Background())
|
|
defer cancel()
|
|
|
|
var ran []string
|
|
var mainStepCtxErr error
|
|
preSteps := []common.Executor{
|
|
func(_ context.Context) error {
|
|
ran = append(ran, "pre1")
|
|
cancel() // server cancellation lands during the pre phase
|
|
return nil
|
|
},
|
|
}
|
|
steps := []common.Executor{
|
|
func(c context.Context) error {
|
|
ran = append(ran, "always-step")
|
|
mainStepCtxErr = c.Err()
|
|
return nil
|
|
},
|
|
}
|
|
|
|
err := newStepsExecutor(rc, preSteps, steps)(ctx)
|
|
|
|
require.ErrorIs(t, err, context.Canceled, "the cancellation is propagated")
|
|
assert.Equal(t, []string{"pre1", "always-step"}, ran, "the main always() step runs after a pre-phase cancel")
|
|
require.NoError(t, mainStepCtxErr, "the main step runs under a fresh, non-cancelled context")
|
|
assert.True(t, rc.jobCancelled, "the job is marked cancelled")
|
|
}
|
|
|
|
// TestStepsExecutorRunsMainStepsAfterPreFailure verifies that a failing pre step does not abandon
|
|
// the main steps: they still run (so a main-stage always()/failure() step is reached), and the
|
|
// pre-step error is propagated so the job is reported as failed. The main steps' own `if`
|
|
// evaluation is what skips success()-default steps, so running them here is safe.
|
|
func TestStepsExecutorRunsMainStepsAfterPreFailure(t *testing.T) {
|
|
rc := createIfTestRunContext(map[string]*model.Job{
|
|
"job1": createJob(t, `runs-on: ubuntu-latest`, ""),
|
|
})
|
|
|
|
var ran []string
|
|
preSteps := []common.Executor{
|
|
func(_ context.Context) error {
|
|
ran = append(ran, "pre1")
|
|
return assert.AnError
|
|
},
|
|
}
|
|
steps := []common.Executor{
|
|
func(_ context.Context) error {
|
|
ran = append(ran, "always-step")
|
|
return nil
|
|
},
|
|
}
|
|
|
|
err := newStepsExecutor(rc, preSteps, steps)(context.Background())
|
|
|
|
require.ErrorIs(t, err, assert.AnError, "the pre-step error is propagated")
|
|
assert.Equal(t, []string{"pre1", "always-step"}, ran, "the main always() step runs after a pre-step failure")
|
|
assert.False(t, rc.jobCancelled, "a pre-step failure is not a cancellation")
|
|
}
|
|
|
|
// TestPreStepFailureAffectsMainStepIfStatus verifies the status path used by real
|
|
// main-step `if` evaluation. A pre-step failure is not present in StepResults, so
|
|
// recording only the context job error is not enough: getJobContext must also report
|
|
// failure so success()-default main steps skip and failure() steps run.
|
|
func TestPreStepFailureAffectsMainStepIfStatus(t *testing.T) {
|
|
rc := createIfTestRunContext(map[string]*model.Job{
|
|
"job1": createJob(t, `runs-on: ubuntu-latest`, ""),
|
|
})
|
|
ctx := common.WithJobErrorContainer(context.Background())
|
|
|
|
reportStepError(ctx, rc, assert.AnError)
|
|
|
|
assert.Equal(t, "failure", rc.getJobContext().Status)
|
|
require.ErrorIs(t, common.JobError(ctx), assert.AnError)
|
|
|
|
defaultStep := &stepRun{
|
|
RunContext: rc,
|
|
Step: &model.Step{ID: "default-step"},
|
|
env: map[string]string{},
|
|
}
|
|
defaultEnabled, err := isStepEnabled(ctx, defaultStep.getIfExpression(ctx, stepStageMain), defaultStep, stepStageMain)
|
|
require.NoError(t, err)
|
|
assert.False(t, defaultEnabled, "default success() main step must skip after a pre-step failure")
|
|
|
|
failureStep := &stepRun{
|
|
RunContext: rc,
|
|
Step: &model.Step{
|
|
ID: "failure-step",
|
|
If: yaml.Node{Value: "failure()"},
|
|
},
|
|
env: map[string]string{},
|
|
}
|
|
failureEnabled, err := isStepEnabled(ctx, failureStep.getIfExpression(ctx, stepStageMain), failureStep, stepStageMain)
|
|
require.NoError(t, err)
|
|
assert.True(t, failureEnabled, "failure() main step must run after a pre-step failure")
|
|
}
|
|
|
|
// TestPostStepsContextCancelledIsUsableForFailingStep guards against a panic: post/cleanup
|
|
// steps run on a context derived from the cancelled job context, and a failing post step
|
|
// records its error via common.SetJobError. If that derived context lacks a job-error container,
|
|
// SetJobError dereferences a nil map and panics. The post context must therefore be detached
|
|
// from cancellation (so the steps run) yet still carry a usable error container.
|
|
func TestPostStepsContextCancelledIsUsableForFailingStep(t *testing.T) {
|
|
cancelled, cancel := context.WithCancel(common.WithJobErrorContainer(context.Background()))
|
|
cancel()
|
|
require.ErrorIs(t, cancelled.Err(), context.Canceled)
|
|
|
|
postCtx, done := postStepsContext(cancelled)
|
|
defer done()
|
|
|
|
// Detached from cancellation, so the post steps actually run.
|
|
require.NoError(t, postCtx.Err(), "post context must not be cancelled")
|
|
|
|
// A failing post step records its error instead of panicking.
|
|
require.NotPanics(t, func() {
|
|
common.SetJobError(postCtx, assert.AnError)
|
|
}, "a failing post step must not panic on the cancel path")
|
|
assert.ErrorIs(t, common.JobError(postCtx), assert.AnError)
|
|
}
|
|
|
|
// TestPostStepsContextDeadlinePreservesJobError verifies the job-timeout path keeps the original
|
|
// job-error container (via context.WithoutCancel), so the timeout failure and any post-step error
|
|
// survive into the post phase and the job is still reported as failed.
|
|
func TestPostStepsContextDeadlinePreservesJobError(t *testing.T) {
|
|
base := common.WithJobErrorContainer(context.Background())
|
|
common.SetJobError(base, assert.AnError)
|
|
expired, cancel := context.WithDeadline(base, time.Now().Add(-time.Hour))
|
|
defer cancel()
|
|
require.ErrorIs(t, expired.Err(), context.DeadlineExceeded)
|
|
|
|
postCtx, done := postStepsContext(expired)
|
|
defer done()
|
|
|
|
require.NoError(t, postCtx.Err(), "post context must not carry the expired deadline")
|
|
assert.ErrorIs(t, common.JobError(postCtx), assert.AnError, "the timeout job error must be preserved")
|
|
}
|