fix: run always()/cancelled() and post steps correctly on cancellation (#1043)

## Problem When a workflow run is **cancelled**, the runner diverged from GitHub Actions: - Main-stage `if: ${{ always() }}` / `if: ${{ cancelled() }}` steps **did not run** at all (unlike a *failed* run, where they do). - `if: ${{ cancelled() }}` was **structurally impossible** to satisfy — it could never be true. GitHub Actions runs `always()`/`cancelled()` steps (and post cleanup) even when a job is cancelled. This runner only honored that for action *post* steps (since #1016), leaving main-stage cleanup steps silently skipped. ## Root causes (both in `act/`) 1. **`getJobContext()`** derived the job status purely from step conclusions, so it could only ever return `"success"` or `"failure"`. Since `cancelled()` checks `Job.Status == "cancelled"`, it was impossible — and `success()` stayed *true* on a cancelled run, so the wrong `if` branch was taken everywhere. 2. **The main step pipeline** is chained with `Executor.Then()`, which short-circuits the moment `ctx.Err() != nil`. Once the server cancelled, every not-yet-started main step (including `always()` ones) was abandoned. ## Fix - Add a per-`RunContext` `jobCancelled` flag + `markCancelled()`. `getJobContext()` now reports `"cancelled"` (taking precedence over success/failure), so `cancelled()`/`always()` are true and `success()`/`failure()` are false — matching GitHub's "only always()/cancelled() run on cancel" semantics. - Replace the plain main-steps pipeline with `newMainStepsExecutor`. On interruption (`context.Canceled` from a server cancel, or `context.DeadlineExceeded` from the job timeout) it marks the job cancelled and runs the **remaining** steps under a fresh context (`context.WithoutCancel` + bounded timeout) so `always()`/`cancelled()` steps run for cleanup, while default-`success()` steps skip themselves. The original interrupt error is still propagated upward. - Backstop `markCancelled()` in the post-step `Finally` so cancellations landing outside the main loop still surface the cancelled status to post steps. Pre-steps keep normal short-circuit behavior, and reporting (`RESULT_CANCELLED`) is untouched — that remains handled by #1016. ## Reporting semantics (unchanged by this PR) | Run state | failing post/`always()` step reported as | | --------- | --------------------------------------------------------------------------------------------- | | Normal | **FAILURE** | | Timeout | **FAILURE** (deadline path preserves the job-error container) | | Cancelled | **CANCELLED** — cancellation wins; the failing step is logged but doesn't flip the conclusion | The new `always()` path runs under `context.WithoutCancel`, so the job-error container is preserved — a failing `always()` step records its failure at step level and does not panic in `SetJobError`. Fixes #657 Reviewed-on: https://gitea.com/gitea/runner/pulls/1043 Reviewed-by: Zettat123 <39446+zettat123@noreply.gitea.com>
2026-06-29 07:34:26 +00:00 · 2026-06-28 20:54:00 +00:00
parent 99bc50d538
commit e22d3fa263
3 changed files with 435 additions and 30 deletions
--- a/act/runner/cancellation_test.go
+++ b/act/runner/cancellation_test.go
@@ -0,0 +1,285 @@
+// Copyright 2026 The Gitea Authors. All rights reserved.
+// SPDX-License-Identifier: MIT
+
+package runner
+
+import (
+	"context"
+	"testing"
+	"time"
+
+	"gitea.com/gitea/runner/act/common"
+	"gitea.com/gitea/runner/act/exprparser"
+	"gitea.com/gitea/runner/act/model"
+
+	"github.com/stretchr/testify/assert"
+	"github.com/stretchr/testify/require"
+	"go.yaml.in/yaml/v4"
+)
+
+// TestCancelledJobStatusEnablesAlwaysAndCancelledSteps verifies that once a job is
+// cancelled, getJobContext reports the "cancelled" status so the step `if` functions
+// evaluate the way GitHub Actions does: cancelled()/always() are true, success()/failure()
+// are false. A step that defaults to success() is therefore skipped while an always() step
+// still runs. Before the fix the status could only ever be success/failure, so cancelled()
+// was structurally impossible and cancel-only cleanup steps never ran.
+func TestCancelledJobStatusEnablesAlwaysAndCancelledSteps(t *testing.T) {
+	rc := createIfTestRunContext(map[string]*model.Job{
+		"job1": createJob(t, `runs-on: ubuntu-latest`, ""),
+	})
+	rc.markCancelled()
+
+	// The core fix: the job status context now reports "cancelled" instead of being
+	// pinned to success/failure.
+	jobCtx := rc.getJobContext()
+	require.Equal(t, "cancelled", jobCtx.Status)
+
+	// Feed that status through the step-context expression functions, which is what a
+	// step `if` evaluates. On a cancelled job only always()/cancelled() are true.
+	interp := exprparser.NewInterpeter(
+		&exprparser.EvaluationEnvironment{Job: jobCtx},
+		exprparser.Config{Context: "step"},
+	)
+	for expr, want := range map[string]bool{
+		"cancelled()":  true,
+		"always()":     true,
+		"success()":    false,
+		"failure()":    false,
+		"!cancelled()": false,
+	} {
+		got, err := interp.Evaluate(expr, exprparser.DefaultStatusCheckNone)
+		require.NoErrorf(t, err, "Evaluate(%q)", expr)
+		assert.Equalf(t, want, got, "Evaluate(%q) on a cancelled job", expr)
+	}
+
+	// A step without an `if` defaults to success() and must be skipped on cancel,
+	// while an `if: always()` step must still run.
+	disabled, err := interp.Evaluate("", exprparser.DefaultStatusCheckSuccess)
+	require.NoError(t, err)
+	assert.Equal(t, false, disabled, "default-success step must be skipped on a cancelled job")
+
+	enabled, err := interp.Evaluate("always()", exprparser.DefaultStatusCheckSuccess)
+	require.NoError(t, err)
+	assert.Equal(t, true, enabled, "`if: always()` step must run on a cancelled job")
+}
+
+// TestMainStepsExecutorRunsAlwaysStepsAfterCancel verifies that newMainStepsExecutor does
+// not abandon the remaining steps when the run is cancelled mid-pipeline. The later step
+// still runs (so a main-stage always() step is reached), it runs under a fresh,
+// non-cancelled context, and the job is marked cancelled. The interrupt error is still
+// propagated so callers up the chain see the cancellation.
+func TestMainStepsExecutorRunsAlwaysStepsAfterCancel(t *testing.T) {
+	rc := createIfTestRunContext(map[string]*model.Job{
+		"job1": createJob(t, `runs-on: ubuntu-latest`, ""),
+	})
+
+	ctx, cancel := context.WithCancel(context.Background())
+	defer cancel()
+
+	var ran []string
+	var laterStepCtxErr error
+	steps := []common.Executor{
+		func(_ context.Context) error {
+			ran = append(ran, "step1")
+			cancel() // server cancellation lands while step1 runs
+			return nil
+		},
+		func(c context.Context) error {
+			ran = append(ran, "always-step")
+			laterStepCtxErr = c.Err()
+			return nil
+		},
+	}
+
+	err := newMainStepsExecutor(rc, steps)(ctx)
+
+	require.ErrorIs(t, err, context.Canceled, "interrupt error is propagated")
+	assert.Equal(t, []string{"step1", "always-step"}, ran, "the always() step still runs after cancel")
+	require.NoError(t, laterStepCtxErr, "remaining steps run under a fresh, non-cancelled context")
+	assert.True(t, rc.jobCancelled, "the job is marked cancelled")
+}
+
+// TestMainStepsExecutorMarksFailedOnTimeoutBetweenSteps guards the timeout path's symmetry with the cancel path.
+// When the job deadline (timeout-minutes) lands in the gap between two steps, the job must be marked as failed (not cancelled),
+// so always()/failure() cleanup steps run while default success() steps skip, and so the timed-out job is not reported as success.
+func TestMainStepsExecutorMarksFailedOnTimeoutBetweenSteps(t *testing.T) {
+	rc := createIfTestRunContext(map[string]*model.Job{
+		"job1": createJob(t, `runs-on: ubuntu-latest`, ""),
+	})
+
+	// A short deadline that we let elapse between steps, so no step records the error itself.
+	ctx, cancel := context.WithTimeout(context.Background(), 10*time.Millisecond)
+	defer cancel()
+
+	var ran []string
+	var laterStepCtxErr error
+	steps := []common.Executor{
+		func(c context.Context) error {
+			ran = append(ran, "step1")
+			// Block until the job deadline elapses, then return cleanly: the interrupt lands in the loop's between-steps check, not inside a step.
+			<-c.Done()
+			return nil
+		},
+		func(c context.Context) error {
+			ran = append(ran, "always-step")
+			laterStepCtxErr = c.Err()
+			return nil
+		},
+	}
+
+	err := newMainStepsExecutor(rc, steps)(ctx)
+
+	require.ErrorIs(t, err, context.DeadlineExceeded, "the timeout error is propagated")
+	assert.Equal(t, []string{"step1", "always-step"}, ran, "the always() step still runs after a timeout")
+	require.NoError(t, laterStepCtxErr, "remaining steps run under a fresh, non-expired context")
+	assert.True(t, rc.jobFailed, "a job timeout marks the job failed")
+	assert.False(t, rc.jobCancelled, "a timeout is not a cancellation")
+
+	// The status the real main-step `if` evaluation sees: "failure", so default success() steps skip while always()/failure() steps run.
+	assert.Equal(t, "failure", rc.getJobContext().Status)
+}
+
+// TestStepsExecutorRunsMainStepsAfterPreCancel verifies that a cancellation landing during the
+// pre phase does not abandon the main steps: newStepsExecutor still runs the main-steps executor,
+// so a main-stage always()/cancelled() step is reached (under a fresh, non-cancelled context),
+// the job is marked cancelled, and the cancellation is propagated. Before the fix the `.Then(...)`
+// short-circuit skipped the main steps entirely when a pre step was cancelled.
+func TestStepsExecutorRunsMainStepsAfterPreCancel(t *testing.T) {
+	rc := createIfTestRunContext(map[string]*model.Job{
+		"job1": createJob(t, `runs-on: ubuntu-latest`, ""),
+	})
+
+	ctx, cancel := context.WithCancel(context.Background())
+	defer cancel()
+
+	var ran []string
+	var mainStepCtxErr error
+	preSteps := []common.Executor{
+		func(_ context.Context) error {
+			ran = append(ran, "pre1")
+			cancel() // server cancellation lands during the pre phase
+			return nil
+		},
+	}
+	steps := []common.Executor{
+		func(c context.Context) error {
+			ran = append(ran, "always-step")
+			mainStepCtxErr = c.Err()
+			return nil
+		},
+	}
+
+	err := newStepsExecutor(rc, preSteps, steps)(ctx)
+
+	require.ErrorIs(t, err, context.Canceled, "the cancellation is propagated")
+	assert.Equal(t, []string{"pre1", "always-step"}, ran, "the main always() step runs after a pre-phase cancel")
+	require.NoError(t, mainStepCtxErr, "the main step runs under a fresh, non-cancelled context")
+	assert.True(t, rc.jobCancelled, "the job is marked cancelled")
+}
+
+// TestStepsExecutorRunsMainStepsAfterPreFailure verifies that a failing pre step does not abandon
+// the main steps: they still run (so a main-stage always()/failure() step is reached), and the
+// pre-step error is propagated so the job is reported as failed. The main steps' own `if`
+// evaluation is what skips success()-default steps, so running them here is safe.
+func TestStepsExecutorRunsMainStepsAfterPreFailure(t *testing.T) {
+	rc := createIfTestRunContext(map[string]*model.Job{
+		"job1": createJob(t, `runs-on: ubuntu-latest`, ""),
+	})
+
+	var ran []string
+	preSteps := []common.Executor{
+		func(_ context.Context) error {
+			ran = append(ran, "pre1")
+			return assert.AnError
+		},
+	}
+	steps := []common.Executor{
+		func(_ context.Context) error {
+			ran = append(ran, "always-step")
+			return nil
+		},
+	}
+
+	err := newStepsExecutor(rc, preSteps, steps)(context.Background())
+
+	require.ErrorIs(t, err, assert.AnError, "the pre-step error is propagated")
+	assert.Equal(t, []string{"pre1", "always-step"}, ran, "the main always() step runs after a pre-step failure")
+	assert.False(t, rc.jobCancelled, "a pre-step failure is not a cancellation")
+}
+
+// TestPreStepFailureAffectsMainStepIfStatus verifies the status path used by real
+// main-step `if` evaluation. A pre-step failure is not present in StepResults, so
+// recording only the context job error is not enough: getJobContext must also report
+// failure so success()-default main steps skip and failure() steps run.
+func TestPreStepFailureAffectsMainStepIfStatus(t *testing.T) {
+	rc := createIfTestRunContext(map[string]*model.Job{
+		"job1": createJob(t, `runs-on: ubuntu-latest`, ""),
+	})
+	ctx := common.WithJobErrorContainer(context.Background())
+
+	reportStepError(ctx, rc, assert.AnError)
+
+	assert.Equal(t, "failure", rc.getJobContext().Status)
+	require.ErrorIs(t, common.JobError(ctx), assert.AnError)
+
+	defaultStep := &stepRun{
+		RunContext: rc,
+		Step:       &model.Step{ID: "default-step"},
+		env:        map[string]string{},
+	}
+	defaultEnabled, err := isStepEnabled(ctx, defaultStep.getIfExpression(ctx, stepStageMain), defaultStep, stepStageMain)
+	require.NoError(t, err)
+	assert.False(t, defaultEnabled, "default success() main step must skip after a pre-step failure")
+
+	failureStep := &stepRun{
+		RunContext: rc,
+		Step: &model.Step{
+			ID: "failure-step",
+			If: yaml.Node{Value: "failure()"},
+		},
+		env: map[string]string{},
+	}
+	failureEnabled, err := isStepEnabled(ctx, failureStep.getIfExpression(ctx, stepStageMain), failureStep, stepStageMain)
+	require.NoError(t, err)
+	assert.True(t, failureEnabled, "failure() main step must run after a pre-step failure")
+}
+
+// TestPostStepsContextCancelledIsUsableForFailingStep guards against a panic: post/cleanup
+// steps run on a context derived from the cancelled job context, and a failing post step
+// records its error via common.SetJobError. If that derived context lacks a job-error container,
+// SetJobError dereferences a nil map and panics. The post context must therefore be detached
+// from cancellation (so the steps run) yet still carry a usable error container.
+func TestPostStepsContextCancelledIsUsableForFailingStep(t *testing.T) {
+	cancelled, cancel := context.WithCancel(common.WithJobErrorContainer(context.Background()))
+	cancel()
+	require.ErrorIs(t, cancelled.Err(), context.Canceled)
+
+	postCtx, done := postStepsContext(cancelled)
+	defer done()
+
+	// Detached from cancellation, so the post steps actually run.
+	require.NoError(t, postCtx.Err(), "post context must not be cancelled")
+
+	// A failing post step records its error instead of panicking.
+	require.NotPanics(t, func() {
+		common.SetJobError(postCtx, assert.AnError)
+	}, "a failing post step must not panic on the cancel path")
+	assert.ErrorIs(t, common.JobError(postCtx), assert.AnError)
+}
+
+// TestPostStepsContextDeadlinePreservesJobError verifies the job-timeout path keeps the original
+// job-error container (via context.WithoutCancel), so the timeout failure and any post-step error
+// survive into the post phase and the job is still reported as failed.
+func TestPostStepsContextDeadlinePreservesJobError(t *testing.T) {
+	base := common.WithJobErrorContainer(context.Background())
+	common.SetJobError(base, assert.AnError)
+	expired, cancel := context.WithDeadline(base, time.Now().Add(-time.Hour))
+	defer cancel()
+	require.ErrorIs(t, expired.Err(), context.DeadlineExceeded)
+
+	postCtx, done := postStepsContext(expired)
+	defer done()
+
+	require.NoError(t, postCtx.Err(), "post context must not carry the expired deadline")
+	assert.ErrorIs(t, common.JobError(postCtx), assert.AnError, "the timeout job error must be preserved")
+}
--- a/act/runner/job_executor.go
+++ b/act/runner/job_executor.go
@@ -58,9 +58,10 @@ type jobInfo interface {

 // reportStepError emits the GitHub Actions ##[error] annotation and records
 // the error against the job so the job is reported as failed.
-func reportStepError(ctx context.Context, err error) {
+func reportStepError(ctx context.Context, rc *RunContext, err error) {
 	common.Logger(ctx).Errorf("##[error]%v", err)
 	common.SetJobError(ctx, err)
+	rc.markFailed()
 }

 func newJobExecutor(info jobInfo, sf stepFactory, rc *RunContext) common.Executor {
@@ -118,9 +119,9 @@ func newJobExecutor(info jobInfo, sf stepFactory, rc *RunContext) common.Executo
 			rc.CurrentStepIndex = stepIdx
 			preErr := preExec(ctx)
 			if preErr != nil {
-				reportStepError(ctx, preErr)
+				reportStepError(ctx, rc, preErr)
 			} else if ctx.Err() != nil {
-				reportStepError(ctx, ctx.Err())
+				reportStepError(ctx, rc, ctx.Err())
 			}
 			return preErr
 		}))
@@ -130,9 +131,9 @@ func newJobExecutor(info jobInfo, sf stepFactory, rc *RunContext) common.Executo
 			rc.CurrentStepIndex = stepIdx
 			err := stepExec(ctx)
 			if err != nil {
-				reportStepError(ctx, err)
+				reportStepError(ctx, rc, err)
 			} else if ctx.Err() != nil {
-				reportStepError(ctx, ctx.Err())
+				reportStepError(ctx, rc, ctx.Err())
 			}
 			return nil
 		}))
@@ -142,9 +143,9 @@ func newJobExecutor(info jobInfo, sf stepFactory, rc *RunContext) common.Executo
 			rc.CurrentStepIndex = stepIdx
 			err := postFn(ctx)
 			if err != nil {
-				reportStepError(ctx, err)
+				reportStepError(ctx, rc, err)
 			} else if ctx.Err() != nil {
-				reportStepError(ctx, ctx.Err())
+				reportStepError(ctx, rc, ctx.Err())
 			}
 			return err
 		})
@@ -159,7 +160,12 @@ func newJobExecutor(info jobInfo, sf stepFactory, rc *RunContext) common.Executo
 	postExecutor = postExecutor.Finally(func(ctx context.Context) error {
 		jobError := common.JobError(ctx)
 		var err error
-		if rc.Config.AutoRemove || jobError == nil {
+		// jobError == nil keeps a failed job's container alive for post-mortem debugging when
+		// AutoRemove is off (the act-CLI --rm behavior; the shipped runner always sets
+		// AutoRemove). A cancelled run is not a failure to inspect, and the cancel-path post
+		// context now carries its own error container so a failing post step makes jobError
+		// non-nil — OR in rc.jobCancelled so cancellation still always tears the container down.
+		if rc.Config.AutoRemove || jobError == nil || rc.jobCancelled {
 			// always allow 1 min for stopping and removing the runner, even if we were cancelled
 			ctx, cancel := context.WithTimeout(common.WithLogger(context.Background(), common.Logger(ctx)), time.Minute)
 			defer cancel()
@@ -198,35 +204,107 @@ func newJobExecutor(info jobInfo, sf stepFactory, rc *RunContext) common.Executo
 		return err
 	})

-	pipeline := make([]common.Executor, 0)
-	pipeline = append(pipeline, preSteps...)
-	pipeline = append(pipeline, steps...)
+	stepsExecutor := newStepsExecutor(rc, preSteps, steps)

-	return common.NewPipelineExecutor(info.startContainer(), common.NewPipelineExecutor(pipeline...).
+	return common.NewPipelineExecutor(info.startContainer(), stepsExecutor.
 		Finally(func(ctx context.Context) error {
-			var cancel context.CancelFunc
-			switch ctx.Err() {
-			case context.Canceled:
-				// in case of an aborted run, we still should execute the
-				// post steps to allow cleanup.
-				ctx, cancel = context.WithTimeout(common.WithLogger(context.Background(), common.Logger(ctx)), 5*time.Minute)
-				defer cancel()
-			case context.DeadlineExceeded:
-				// The job hit its timeout-minutes. Without a fresh context the post
-				// steps would run against the already-expired context and be skipped,
-				// so cleanup post-hooks (e.g. actions/checkout post, cache save) would
-				// not run. Derive the context with WithoutCancel so the new deadline
-				// applies but the job error state is preserved: the job is still
-				// reported as failed and container teardown matches a normal failure.
-				ctx, cancel = context.WithTimeout(context.WithoutCancel(ctx), 5*time.Minute)
-				defer cancel()
-			}
-			return postExecutor(ctx)
+			// Record an interrupt (backstop for interrupts that land outside the main
+			// step loop) so the post steps observe the cancelled/failed job status.
+			rc.markInterrupted(ctx.Err())
+			postCtx, cancel := postStepsContext(ctx)
+			defer cancel()
+			return postExecutor(postCtx)
 		}).
 		Finally(info.interpolateOutputs()).
 		Finally(info.closeContainer()))
 }

+// postStepsContext derives the context used to run the job's post/cleanup steps from the
+// finished main-pipeline context. Cleanup has to run even when the run was interrupted, so the
+// returned context always carries a fresh bounded deadline and is never itself cancelled.
+//
+//   - context.Canceled (server cancel): detach from the cancelled context via a fresh root so
+//     the post steps can run.
+//   - context.DeadlineExceeded (job timeout): detach the deadline with WithoutCancel, which
+//     keeps the original values — including the job-error container — so the timeout failure and
+//     any post-step error are preserved and the job is still reported as failed.
+//   - otherwise: run on the live context unchanged.
+func postStepsContext(ctx context.Context) (context.Context, context.CancelFunc) {
+	switch ctx.Err() {
+	case context.Canceled:
+		// The cancelled context is abandoned for a fresh root, which drops the job-error
+		// container installed at the job root. Re-attach a fresh one so a failing post step
+		// records its error via SetJobError instead of panicking on a nil container.
+		return context.WithTimeout(common.WithJobErrorContainer(common.WithLogger(context.Background(), common.Logger(ctx))), 5*time.Minute)
+	case context.DeadlineExceeded:
+		return context.WithTimeout(context.WithoutCancel(ctx), 5*time.Minute)
+	default:
+		return ctx, func() {}
+	}
+}
+
+// newStepsExecutor sequences the job's pre steps and main steps.
+//
+// The pre steps run as a normal pipeline that short-circuits on the first failure or
+// cancellation. The main-steps executor then runs unconditionally — even if a pre step failed
+// or the job was interrupted — so always()/cancelled()/failure() main steps still run, mirroring
+// GitHub Actions. This is safe because each main step re-evaluates its own `if` (a pre-step
+// failure flips the expression job status to failure, so success()-default steps skip) and
+// newMainStepsExecutor detaches from an interrupted context before running the remaining steps.
+//
+// A pre-step failure or interrupt is still propagated so the job is reported with the correct
+// conclusion; the pre error takes precedence since it happened first.
+func newStepsExecutor(rc *RunContext, preSteps, steps []common.Executor) common.Executor {
+	preExecutor := common.NewPipelineExecutor(preSteps...)
+	mainExecutor := newMainStepsExecutor(rc, steps)
+	return func(ctx context.Context) error {
+		preErr := preExecutor(ctx)
+		mainErr := mainExecutor(ctx)
+		if preErr != nil {
+			return preErr
+		}
+		return mainErr
+	}
+}
+
+// newMainStepsExecutor runs the job's main-stage step executors in order. Unlike a plain
+// pipeline, an interruption (context.Canceled from a server cancel, or context.DeadlineExceeded
+// from the job timeout) does not abandon the remaining steps: it marks the job cancelled when
+// appropriate and keeps iterating under a fresh, bounded context so steps whose `if` still
+// evaluates true — always() and cancelled() — run for cleanup, mirroring GitHub Actions. Steps
+// that default to success() skip themselves because success() is false once the job is no longer
+// successful. The main-step wrappers report their own errors and return nil, so the loop drives
+// step ordering off the context, not return values.
+func newMainStepsExecutor(rc *RunContext, steps []common.Executor) common.Executor {
+	return func(ctx context.Context) error {
+		for i, step := range steps {
+			if ctx.Err() != nil {
+				return runMainStepsAfterInterrupt(ctx, rc, steps[i:])
+			}
+			_ = step(ctx)
+		}
+		// An interrupt can land during the final step, after the loop's last context
+		// check; record it so the post steps still observe the cancelled/failed status.
+		rc.markInterrupted(ctx.Err())
+		return nil
+	}
+}
+
+// runMainStepsAfterInterrupt runs the remaining main steps after the job context was cancelled or
+// timed out. It detaches from the interrupted context (keeping its values: logger and job error)
+// and applies a fresh deadline so always()/cancelled() steps run to completion. The original
+// interrupt error is returned so callers up the chain still see the job as cancelled/timed out.
+func runMainStepsAfterInterrupt(ctx context.Context, rc *RunContext, steps []common.Executor) error {
+	interruptErr := ctx.Err()
+	rc.markInterrupted(interruptErr)
+	freshCtx, cancel := context.WithTimeout(context.WithoutCancel(ctx), 5*time.Minute)
+	defer cancel()
+	for _, step := range steps {
+		_ = step(freshCtx)
+	}
+	return interruptErr
+}
+
 func setJobResult(ctx context.Context, info jobInfo, rc *RunContext, success bool) {
 	logger := common.Logger(ctx)

--- a/act/runner/run_context.go
+++ b/act/runner/run_context.go
@@ -73,6 +73,39 @@ type RunContext struct {
 	// captured before execution so each matrix combo interpolates from the originals rather
 	// than from a sibling's already-resolved values written into the shared Job.Outputs.
 	outputTemplate map[string]string
+	// jobCancelled records that this job's run was cancelled (context.Canceled). It makes
+	// getJobContext report the "cancelled" status so cancelled()/always() evaluate the way
+	// GitHub Actions does, letting cleanup and always() steps run while normal steps skip.
+	jobCancelled bool
+	// jobFailed records failures outside normal main-step results, such as action pre-step
+	// failures. Those failures must still make success() false and failure() true for later
+	// main-step if evaluation.
+	jobFailed bool
+}
+
+// markCancelled flags the job as cancelled so subsequent step `if` evaluations and the
+// job status context observe the "cancelled" state.
+func (rc *RunContext) markCancelled() {
+	rc.jobCancelled = true
+}
+
+// markFailed flags the job as failed so subsequent step `if` evaluations observe
+// failure even when the error happened outside a main step result.
+func (rc *RunContext) markFailed() {
+	rc.jobFailed = true
+}
+
+// markInterrupted records the job's interruption status from a context error so later step `if` evaluations and the job result observe it,
+// keeping the timeout path symmetric with the cancel path:
+//   - context.Canceled (server cancel) marks the job cancelled, matching GitHub's "only always()/cancelled() run on cancel".
+//   - context.DeadlineExceeded (job timeout-minutes) marks the job failed, matching the "Timeout -> FAILURE" reporting semantics.
+func (rc *RunContext) markInterrupted(err error) {
+	switch {
+	case errors.Is(err, context.Canceled):
+		rc.markCancelled()
+	case errors.Is(err, context.DeadlineExceeded):
+		rc.markFailed()
+	}
 }

 func (rc *RunContext) AddMask(mask string) {
@@ -904,12 +937,21 @@ func trimToLen(s string, l int) string {

 func (rc *RunContext) getJobContext() *model.JobContext {
 	jobStatus := "success"
+	if rc.jobFailed {
+		jobStatus = "failure"
+	}
 	for _, stepStatus := range rc.StepResults {
 		if stepStatus.Conclusion == model.StepStatusFailure {
 			jobStatus = "failure"
 			break
 		}
 	}
+	// A cancelled run takes precedence over success/failure so cancelled() is true and
+	// success()/failure() are false, matching GitHub Actions: on cancellation only
+	// always() and cancelled() steps run.
+	if rc.jobCancelled {
+		jobStatus = "cancelled"
+	}
 	return &model.JobContext{
 		Status: jobStatus,
 	}