From 99bc50d538e69018371b461d73bfa6027bb456d7 Mon Sep 17 00:00:00 2001 From: Zettat123 Date: Sun, 28 Jun 2026 20:12:21 +0000 Subject: [PATCH] feat: shallow clone action repositories (#1053) ## Summary When a workflow references a remote action (e.g. `uses: actions/checkout@v4`) the runner clones that repository during job setup. Previously this was always a full clone(every branch and the complete history) even though only a single ref is needed. This PR makes the runner shallow-clone the requested ref by default (`--depth=1`, single branch), falling back to a full clone when a shallow clone fails. Notes: - Existing on-disk caches are reused as-is; there is no forced re-clone on upgrade. ## Changes - A new `runner.action_shallow_clone` option (default `true`) lets operators opt back into full clones. - `cloneAtDepth`: attempt a shallow clone; fall back to a full clone when shallow clone fails. - Keep a shallow cache cheap on update: fetch the single requested ref at depth 1 and skip `pull`. --------- Co-authored-by: bircni Reviewed-on: https://gitea.com/gitea/runner/pulls/1053 Reviewed-by: Nicolas Co-authored-by: Zettat123 Co-committed-by: Zettat123 --- act/common/git/git.go | 72 ++++++++++++++++++- act/common/git/git_test.go | 91 +++++++++++++++++++++++++ act/runner/reusable_workflow.go | 1 + act/runner/runner.go | 1 + act/runner/step_action_remote.go | 1 + internal/app/run/runner.go | 7 ++ internal/pkg/config/config.example.yaml | 3 + internal/pkg/config/config.go | 5 ++ 8 files changed, 178 insertions(+), 3 deletions(-) diff --git a/act/common/git/git.go b/act/common/git/git.go index 28b17655..243aa5db 100644 --- a/act/common/git/git.go +++ b/act/common/git/git.go @@ -257,6 +257,10 @@ type NewGitCloneExecutorInput struct { Token string OfflineMode bool + // Depth limits the clone/fetch to the given number of commits from the tip of the requested ref. + // 0 for full clone. + Depth int + // For Gitea InsecureSkipTLS bool } @@ -309,7 +313,7 @@ func CloneIfRequired(ctx context.Context, refName plumbing.ReferenceName, input } } - r, err = git.PlainCloneContext(ctx, input.Dir, false, &cloneOptions) + r, err = cloneAtDepth(ctx, input, cloneOptions, logger) if err != nil { logger.Errorf("Unable to clone %v %s: %v", input.URL, refName, err) return nil, false, err @@ -364,6 +368,16 @@ func NewGitCloneExecutor(input NewGitCloneExecutorInput) common.Executor { pullOptions.InsecureSkipTLS = true } + // Action clones only ever need the tip commit, so keep a shallow cache cheap on update at depth 1 regardless of its original depth + // Turning action_shallow_clone off does not convert an existing shallow cache; evict it for a full clone. + shallow := isShallow(r) + if shallow { + fetchOptions.Depth = 1 + if spec, ok := shallowFetchRefSpec(r, input.Ref); ok { + fetchOptions.RefSpecs = []config.RefSpec{spec} + } + } + if !isOfflineMode { err = r.Fetch(&fetchOptions) if err != nil && !errors.Is(err, git.NoErrAlreadyUpToDate) { @@ -431,11 +445,13 @@ func NewGitCloneExecutor(input NewGitCloneExecutorInput) common.Executor { reusedMsg := "" - if !isOfflineMode { + switch { + case !isOfflineMode && !shallow: + // In shallow mode the depth-limited fetch above already advanced the ref. if err = w.Pull(&pullOptions); err != nil && err != git.NoErrAlreadyUpToDate { logger.Debugf("Unable to pull %s: %v", refName, err) } - } else if reused { + case isOfflineMode && reused: reusedMsg = " (reused in offline mode)" } @@ -468,3 +484,53 @@ func NewGitCloneExecutor(input NewGitCloneExecutorInput) common.Executor { return nil } } + +// cloneAtDepth clones input.URL into input.Dir using opts. +// With input.Depth > 0 it first tries a shallow, single-branch clone of input.Ref, falling back when error. +func cloneAtDepth(ctx context.Context, input NewGitCloneExecutorInput, opts git.CloneOptions, logger log.FieldLogger) (*git.Repository, error) { + if input.Depth > 0 { + for _, refName := range []plumbing.ReferenceName{ + plumbing.NewBranchReferenceName(input.Ref), + plumbing.NewTagReferenceName(input.Ref), + } { + shallowOpts := opts + shallowOpts.Depth = input.Depth + shallowOpts.SingleBranch = true + shallowOpts.ReferenceName = refName + shallowOpts.Tags = git.NoTags + + r, err := git.PlainCloneContext(ctx, input.Dir, false, &shallowOpts) + if err == nil { + return r, nil + } + logger.Debugf("Shallow clone of %s as %s failed: %v", input.URL, refName, err) + if rmErr := os.RemoveAll(input.Dir); rmErr != nil { + return nil, fmt.Errorf("remove partial clone %s: %w", input.Dir, rmErr) + } + } + logger.Debugf("Falling back to a full clone of %s for ref %q", input.URL, input.Ref) + } + + return git.PlainCloneContext(ctx, input.Dir, false, &opts) +} + +// isShallow reports whether the local repository was cloned with a limited depth. +func isShallow(r *git.Repository) bool { + shallows, err := r.Storer.Shallow() + return err == nil && len(shallows) > 0 +} + +// shallowFetchRefSpec returns the single refspec that updates only input.Ref, keeping a shallow clone from re-downloading every branch's history. +// ok is false when the ref is not present locally as a tag or remote-tracking branch, in which case the broad default refspec is used. +func shallowFetchRefSpec(r *git.Repository, ref string) (config.RefSpec, bool) { + tagRef := plumbing.NewTagReferenceName(ref) + if _, err := r.Reference(tagRef, false); err == nil { + return config.RefSpec(fmt.Sprintf("+%s:%s", tagRef, tagRef)), true + } + remoteRef := plumbing.NewRemoteReferenceName("origin", ref) + if _, err := r.Reference(remoteRef, false); err == nil { + branchRef := plumbing.NewBranchReferenceName(ref) + return config.RefSpec(fmt.Sprintf("+%s:%s", branchRef, remoteRef)), true + } + return "", false +} diff --git a/act/common/git/git_test.go b/act/common/git/git_test.go index ed40e0c5..e19abc55 100644 --- a/act/common/git/git_test.go +++ b/act/common/git/git_test.go @@ -10,6 +10,7 @@ import ( "os" "os/exec" "path/filepath" + "strconv" "strings" "sync" "syscall" @@ -380,6 +381,96 @@ func TestGitCloneExecutorOfflineMode(t *testing.T) { }) } +func TestGitCloneExecutorShallow(t *testing.T) { + // Build a local "remote" with several commits on main plus a tag, so a full clone would pull noticeably more history than a shallow one. + remoteDir := t.TempDir() + require.NoError(t, gitCmd("init", "--bare", "--initial-branch=main", remoteDir)) + workDir := t.TempDir() + require.NoError(t, gitCmd("clone", remoteDir, workDir)) + require.NoError(t, gitCmd("-C", workDir, "checkout", "-b", "main")) + for _, m := range []string{"c1", "c2", "c3"} { + require.NoError(t, gitCmd("-C", workDir, "commit", "--allow-empty", "-m", m)) + } + require.NoError(t, gitCmd("-C", workDir, "tag", "v1")) + sha := gitRevParse(t, workDir, "HEAD~1") // c2, a SHA that go-git cannot shallow-clone + require.NoError(t, gitCmd("-C", workDir, "push", "-u", "origin", "main")) + require.NoError(t, gitCmd("-C", workDir, "push", "origin", "v1")) + + shallowMarker := func(dir string) string { return filepath.Join(dir, ".git", "shallow") } + + t.Run("branch is cloned shallowly", func(t *testing.T) { + dir := t.TempDir() + require.NoError(t, NewGitCloneExecutor(NewGitCloneExecutorInput{ + URL: remoteDir, Ref: "main", Dir: dir, Depth: 1, + })(t.Context())) + assert.FileExists(t, shallowMarker(dir), "clone should be shallow") + assert.Equal(t, 1, gitRevCount(t, dir), "only the tip commit should be present") + assert.Equal(t, "c3", gitHeadSubject(t, dir)) + }) + + t.Run("tag is cloned shallowly", func(t *testing.T) { + dir := t.TempDir() + require.NoError(t, NewGitCloneExecutor(NewGitCloneExecutorInput{ + URL: remoteDir, Ref: "v1", Dir: dir, Depth: 1, + })(t.Context())) + assert.FileExists(t, shallowMarker(dir), "clone should be shallow") + assert.Equal(t, 1, gitRevCount(t, dir)) + assert.Equal(t, "c3", gitHeadSubject(t, dir)) + }) + + t.Run("SHA falls back to a full clone", func(t *testing.T) { + dir := t.TempDir() + require.NoError(t, NewGitCloneExecutor(NewGitCloneExecutorInput{ + URL: remoteDir, Ref: sha, Dir: dir, Depth: 1, + })(t.Context())) + // go-git cannot shallow-clone a raw SHA, so it falls back to a full clone; the absence of a shallow marker proves the fallback happened. + assert.NoFileExists(t, shallowMarker(dir), "a SHA ref must not produce a shallow clone") + assert.Equal(t, sha, gitRevParse(t, dir, "HEAD")) + }) + + t.Run("moving branch updates while staying shallow", func(t *testing.T) { + dir := t.TempDir() + require.NoError(t, NewGitCloneExecutor(NewGitCloneExecutorInput{ + URL: remoteDir, Ref: "main", Dir: dir, Depth: 1, + })(t.Context())) + require.Equal(t, "c3", gitHeadSubject(t, dir)) + + // Advance main on the remote, then reuse the existing shallow clone. + require.NoError(t, gitCmd("-C", workDir, "commit", "--allow-empty", "-m", "c4")) + require.NoError(t, gitCmd("-C", workDir, "push", "origin", "main")) + + require.NoError(t, NewGitCloneExecutor(NewGitCloneExecutorInput{ + URL: remoteDir, Ref: "main", Dir: dir, Depth: 1, + })(t.Context())) + assert.Equal(t, "c4", gitHeadSubject(t, dir), "reused shallow clone should update to the new tip") + assert.FileExists(t, shallowMarker(dir), "repo should remain shallow after update") + assert.Equal(t, 1, gitRevCount(t, dir)) + }) +} + +func gitRevParse(t *testing.T, dir, rev string) string { + t.Helper() + out, err := exec.Command("git", "-C", dir, "rev-parse", rev).Output() + require.NoError(t, err) + return strings.TrimSpace(string(out)) +} + +func gitRevCount(t *testing.T, dir string) int { + t.Helper() + out, err := exec.Command("git", "-C", dir, "rev-list", "--count", "HEAD").Output() + require.NoError(t, err) + n, err := strconv.Atoi(strings.TrimSpace(string(out))) + require.NoError(t, err) + return n +} + +func gitHeadSubject(t *testing.T, dir string) string { + t.Helper() + out, err := exec.Command("git", "-C", dir, "log", "-1", "--format=%s").Output() + require.NoError(t, err) + return strings.TrimSpace(string(out)) +} + func gitCmd(args ...string) error { cmd := exec.Command("git", args...) cmd.Stdout = os.Stdout diff --git a/act/runner/reusable_workflow.go b/act/runner/reusable_workflow.go index e914937b..e1109a4f 100644 --- a/act/runner/reusable_workflow.go +++ b/act/runner/reusable_workflow.go @@ -141,6 +141,7 @@ func cloneRemoteReusableWorkflow(rc *RunContext, cloneURL, ref, targetDirectory, Dir: targetDirectory, Token: token, OfflineMode: rc.Config.ActionOfflineMode, + Depth: rc.Config.ActionCloneDepth, })(ctx) } } diff --git a/act/runner/runner.go b/act/runner/runner.go index ff57c178..2e6eb5d7 100644 --- a/act/runner/runner.go +++ b/act/runner/runner.go @@ -33,6 +33,7 @@ type Config struct { Workdir string // path to working directory ActionCacheDir string // path used for caching action contents ActionOfflineMode bool // when offline, use cached action contents + ActionCloneDepth int // limit history when cloning an action repo; 0 clones every branch in full BindWorkdir bool // bind the workdir to the job container EventName string // name of event to run EventPath string // path to JSON file to use for event.json in containers diff --git a/act/runner/step_action_remote.go b/act/runner/step_action_remote.go index 416ed1f5..75624824 100644 --- a/act/runner/step_action_remote.go +++ b/act/runner/step_action_remote.go @@ -121,6 +121,7 @@ func (sar *stepActionRemote) prepareActionExecutor() common.Executor { Dir: actionDir, Token: token, OfflineMode: sar.RunContext.Config.ActionOfflineMode, + Depth: sar.RunContext.Config.ActionCloneDepth, InsecureSkipTLS: sar.cloneSkipTLS(), // For Gitea }) diff --git a/internal/app/run/runner.go b/internal/app/run/runner.go index 16e5788b..b7b456df 100644 --- a/internal/app/run/runner.go +++ b/internal/app/run/runner.go @@ -396,6 +396,12 @@ func (r *Runner) run(ctx context.Context, task *runnerv1.Task, reporter *report. maxLifetime = time.Until(deadline) } + // shallow clones the requested ref at depth 1, otherwise 0 means a full clone + actionCloneDepth := 1 + if r.cfg.Runner.ActionShallowClone != nil && !*r.cfg.Runner.ActionShallowClone { + actionCloneDepth = 0 + } + workdirParent := strings.TrimLeft(r.cfg.Container.WorkdirParent, "/") if r.cfg.Container.BindWorkdir { // Append the task ID to isolate concurrent jobs from the same repo. @@ -418,6 +424,7 @@ func (r *Runner) run(ctx context.Context, task *runnerv1.Task, reporter *report. ActionCacheDir: filepath.FromSlash(r.cfg.Host.WorkdirParent), AllocatePTY: r.cfg.Runner.AllocatePTY, ActionOfflineMode: r.cfg.Cache.OfflineMode, + ActionCloneDepth: actionCloneDepth, ReuseContainers: false, ForcePull: r.cfg.Container.ForcePull, diff --git a/internal/pkg/config/config.example.yaml b/internal/pkg/config/config.example.yaml index 31fedef4..2176dff7 100644 --- a/internal/pkg/config/config.example.yaml +++ b/internal/pkg/config/config.example.yaml @@ -69,6 +69,9 @@ runner: # and github_mirror is not empty. In this case, # it replaces https://github.com with the value here, which is useful for some special network environments. github_mirror: '' + # When true (the default), fetch only the requested ref of an action repository (e.g. actions/checkout@v4) at depth 1 instead of cloning every branch's full history. + # Set to false to clone the full history. + action_shallow_clone: true # The labels of a runner are used to determine which jobs the runner can run, and how to run them. # Like: "macos-arm64:host" or "ubuntu-latest:docker://docker.gitea.com/runner-images:ubuntu-latest" # Find more images provided by Gitea at https://gitea.com/gitea/runner-images . diff --git a/internal/pkg/config/config.go b/internal/pkg/config/config.go index e9e5da0f..240def9d 100644 --- a/internal/pkg/config/config.go +++ b/internal/pkg/config/config.go @@ -48,6 +48,7 @@ type Runner struct { ReportCloseTimeout time.Duration `yaml:"report_close_timeout"` // ReportCloseTimeout caps each RPC attempt when flushing the final logs and task state at job completion, on a detached context so a server cancel can't block the acknowledgement. Labels []string `yaml:"labels"` // Labels specify the labels of the runner. Labels are declared on each startup GithubMirror string `yaml:"github_mirror"` // GithubMirror defines what mirrors should be used when using github + ActionShallowClone *bool `yaml:"action_shallow_clone"` // ActionShallowClone fetches only the requested ref of an action repository at depth 1 instead of cloning every branch's full history. It is a pointer to distinguish between false and not set; if not set, it defaults to true. AllocatePTY bool `yaml:"allocate_pty"` // AllocatePTY allocates a pseudo-TTY for each step's process. Default is false, matching GitHub's actions/runner. Enable only for jobs that need an interactive terminal; tools like docker build emit redrawing progress frames into the captured log when a TTY is present. Applies to both host and docker backends. PostTaskScript string `yaml:"post_task_script"` // PostTaskScript is the path to an executable script run on the host after each task's cleanup completes. Empty disables the hook. On Windows use .exe/.bat/.cmd; PowerShell (.ps1) is not supported yet as the configured path. PostTaskScriptTimeout time.Duration `yaml:"post_task_script_timeout"` // PostTaskScriptTimeout caps how long the post-task script may run. Default is 5m when post_task_script is set. @@ -151,6 +152,10 @@ func LoadDefault(file string) (*Config, error) { if cfg.Runner.Timeout <= 0 { cfg.Runner.Timeout = 3 * time.Hour } + if cfg.Runner.ActionShallowClone == nil { + b := true + cfg.Runner.ActionShallowClone = &b + } if cfg.Cache.Enabled == nil { b := true cfg.Cache.Enabled = &b