refactor: migrate from go-git to go-getter (#2512)

This commit is contained in:
Valentin Maerten
2025-12-18 12:21:30 +01:00
committed by GitHub
parent 2ebbb99f58
commit cb183349b7
6 changed files with 485 additions and 113 deletions

View File

@@ -3,16 +3,14 @@ package taskfile
import (
"context"
"fmt"
"io"
"net/url"
"os"
"path/filepath"
"strings"
"sync"
giturls "github.com/chainguard-dev/git-urls"
"github.com/go-git/go-billy/v5/memfs"
"github.com/go-git/go-git/v5"
"github.com/go-git/go-git/v5/plumbing"
"github.com/go-git/go-git/v5/storage/memory"
"github.com/hashicorp/go-getter"
"github.com/go-task/task/v3/errors"
"github.com/go-task/task/v3/internal/execext"
@@ -28,6 +26,36 @@ type GitNode struct {
path string
}
type gitRepoCache struct {
mu sync.Mutex // Protects the locks map
locks map[string]*sync.Mutex // One mutex per repo cache key
}
func (c *gitRepoCache) getLockForRepo(cacheKey string) *sync.Mutex {
c.mu.Lock()
defer c.mu.Unlock()
if _, exists := c.locks[cacheKey]; !exists {
c.locks[cacheKey] = &sync.Mutex{}
}
return c.locks[cacheKey]
}
var globalGitRepoCache = &gitRepoCache{
locks: make(map[string]*sync.Mutex),
}
func CleanGitCache() error {
// Clear the in-memory locks map to prevent memory leak
globalGitRepoCache.mu.Lock()
globalGitRepoCache.locks = make(map[string]*sync.Mutex)
globalGitRepoCache.mu.Unlock()
cacheDir := filepath.Join(os.TempDir(), "task-git-repos")
return os.RemoveAll(cacheDir)
}
func NewGitNode(
entrypoint string,
dir string,
@@ -72,24 +100,78 @@ func (node *GitNode) Read() ([]byte, error) {
return node.ReadContext(context.Background())
}
func (node *GitNode) ReadContext(_ context.Context) ([]byte, error) {
fs := memfs.New()
storer := memory.NewStorage()
_, err := git.Clone(storer, fs, &git.CloneOptions{
URL: node.url.String(),
ReferenceName: plumbing.ReferenceName(node.ref),
SingleBranch: true,
Depth: 1,
})
func (node *GitNode) buildURL() string {
// Get the base URL
baseURL := node.url.String()
ref := node.ref
if ref == "" {
ref = "HEAD"
}
// Always use git:: prefix for git URLs (following Terraform's pattern)
// This forces go-getter to use git protocol
return fmt.Sprintf("git::%s?ref=%s&depth=1", baseURL, ref)
}
// getOrCloneRepo returns the path to a cached git repository.
// If the repository is not cached, it clones it first.
// This function is thread-safe: multiple goroutines cloning the same repo+ref
// will synchronize, and only one clone operation will occur.
//
// The cache directory is /tmp/task-git-repos/{cache_key}/
func (node *GitNode) getOrCloneRepo(ctx context.Context) (string, error) {
cacheKey := node.repoCacheKey()
repoMutex := globalGitRepoCache.getLockForRepo(cacheKey)
repoMutex.Lock()
defer repoMutex.Unlock()
// Check if context was cancelled while waiting for lock
if err := ctx.Err(); err != nil {
return "", fmt.Errorf("context cancelled while waiting for repository lock: %w", err)
}
cacheDir := filepath.Join(os.TempDir(), "task-git-repos", cacheKey)
// check if repo is already cached (under the lock)
gitDir := filepath.Join(cacheDir, ".git")
if _, err := os.Stat(gitDir); err == nil {
return cacheDir, nil
}
getterURL := node.buildURL()
client := &getter.Client{
Ctx: ctx,
Src: getterURL,
Dst: cacheDir,
Mode: getter.ClientModeDir,
}
if err := client.Get(); err != nil {
_ = os.RemoveAll(cacheDir)
return "", fmt.Errorf("failed to clone repository: %w", err)
}
return cacheDir, nil
}
func (node *GitNode) ReadContext(ctx context.Context) ([]byte, error) {
// Get or clone the repository into cache
repoDir, err := node.getOrCloneRepo(ctx)
if err != nil {
return nil, err
}
file, err := fs.Open(node.path)
if err != nil {
return nil, err
// Build path to Taskfile in the cached repo
taskfilePath := node.path
if taskfilePath == "" {
taskfilePath = "Taskfile.yml"
}
// Read the entire response body
b, err := io.ReadAll(file)
filePath := filepath.Join(repoDir, taskfilePath)
// Read file from cached repo
b, err := os.ReadFile(filePath)
if err != nil {
return nil, err
}
@@ -138,6 +220,22 @@ func (node *GitNode) CacheKey() string {
return fmt.Sprintf("git.%s.%s.%s", node.url.Host, prefix, checksum)
}
// repoCacheKey generates a unique cache key for the repository+ref combination.
// Unlike CacheKey() which includes the file path, this identifies the repository itself.
// Two GitNodes with the same repo+ref but different file paths will share the same cache.
//
// Returns a path like: github.com/user/repo.git/main
func (node *GitNode) repoCacheKey() string {
repoPath := strings.Trim(node.url.Path, "/")
ref := node.ref
if ref == "" {
ref = "HEAD"
}
return filepath.Join(node.url.Host, repoPath, ref)
}
func splitURLOnDoubleSlash(u *url.URL) (string, string) {
x := strings.Split(u.Path, "//")
switch len(x) {

View File

@@ -102,3 +102,146 @@ func TestGitNode_CacheKey(t *testing.T) {
assert.Equal(t, tt.expectedKey, key)
}
}
func TestGitNode_buildURL(t *testing.T) {
t.Parallel()
tests := []struct {
name string
entrypoint string
expectedURL string
}{
{
name: "HTTPS with ref",
entrypoint: "https://github.com/foo/bar.git//Taskfile.yml?ref=main",
expectedURL: "git::https://github.com/foo/bar.git?ref=main&depth=1",
},
{
name: "SSH with ref",
entrypoint: "git@github.com:foo/bar.git//Taskfile.yml?ref=main",
expectedURL: "git::ssh://git@github.com/foo/bar.git?ref=main&depth=1",
},
{
name: "HTTPS with tag ref",
entrypoint: "https://github.com/foo/bar.git//Taskfile.yml?ref=v1.0.0",
expectedURL: "git::https://github.com/foo/bar.git?ref=v1.0.0&depth=1",
},
{
name: "HTTPS without ref (uses remote HEAD)",
entrypoint: "https://github.com/foo/bar.git//Taskfile.yml",
expectedURL: "git::https://github.com/foo/bar.git?ref=HEAD&depth=1",
},
{
name: "SSH with directory path",
entrypoint: "git@github.com:foo/bar.git//directory/Taskfile.yml?ref=dev",
expectedURL: "git::ssh://git@github.com/foo/bar.git?ref=dev&depth=1",
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
t.Parallel()
node, err := NewGitNode(tt.entrypoint, "", false)
require.NoError(t, err)
gotURL := node.buildURL()
assert.Equal(t, tt.expectedURL, gotURL)
})
}
}
func TestRepoCacheKey_SameRepoSameRef(t *testing.T) {
t.Parallel()
// Same repo, same ref, different files should have SAME cache key
node1, err := NewGitNode("https://github.com/foo/bar.git//file1.yml?ref=main", "", false)
require.NoError(t, err)
node2, err := NewGitNode("https://github.com/foo/bar.git//dir/file2.yml?ref=main", "", false)
require.NoError(t, err)
key1 := node1.repoCacheKey()
key2 := node2.repoCacheKey()
assert.Equal(t, key1, key2, "Same repo+ref should generate same cache key regardless of file path")
}
func TestRepoCacheKey_SameRepoDifferentRef(t *testing.T) {
t.Parallel()
// Same repo, different ref should have DIFFERENT cache keys
node1, err := NewGitNode("https://github.com/foo/bar.git//file.yml?ref=main", "", false)
require.NoError(t, err)
node2, err := NewGitNode("https://github.com/foo/bar.git//file.yml?ref=dev", "", false)
require.NoError(t, err)
key1 := node1.repoCacheKey()
key2 := node2.repoCacheKey()
assert.NotEqual(t, key1, key2, "Different refs should generate different cache keys")
}
func TestRepoCacheKey_DifferentRepos(t *testing.T) {
t.Parallel()
// Different repos should have DIFFERENT cache keys
node1, err := NewGitNode("https://github.com/foo/bar.git//file.yml?ref=main", "", false)
require.NoError(t, err)
node2, err := NewGitNode("https://github.com/foo/other.git//file.yml?ref=main", "", false)
require.NoError(t, err)
key1 := node1.repoCacheKey()
key2 := node2.repoCacheKey()
assert.NotEqual(t, key1, key2, "Different repos should generate different cache keys")
}
func TestRepoCacheKey_NoRefVsHead(t *testing.T) {
t.Parallel()
// No ref (defaults to HEAD) vs explicit HEAD should have SAME cache key
node1, err := NewGitNode("https://github.com/foo/bar.git//file.yml", "", false)
require.NoError(t, err)
node2, err := NewGitNode("https://github.com/foo/bar.git//file.yml?ref=HEAD", "", false)
require.NoError(t, err)
key1 := node1.repoCacheKey()
key2 := node2.repoCacheKey()
assert.Equal(t, key1, key2, "No ref and explicit HEAD should generate same cache key")
}
func TestRepoCacheKey_SSHvsHTTPS(t *testing.T) {
t.Parallel()
// SSH vs HTTPS pointing to same repo should have SAME cache key
// They clone the same repo, so we want to share the cache
node1, err := NewGitNode("git@github.com:foo/bar.git//file.yml?ref=main", "", false)
require.NoError(t, err)
node2, err := NewGitNode("https://github.com/foo/bar.git//file.yml?ref=main", "", false)
require.NoError(t, err)
key1 := node1.repoCacheKey()
key2 := node2.repoCacheKey()
assert.Equal(t, key1, key2, "SSH and HTTPS for same repo should share cache")
}
func TestRepoCacheKey_Consistency(t *testing.T) {
t.Parallel()
// Calling repoCacheKey multiple times on same node should return same key
node, err := NewGitNode("https://github.com/foo/bar.git//file.yml?ref=main", "", false)
require.NoError(t, err)
key1 := node.repoCacheKey()
key2 := node.repoCacheKey()
key3 := node.repoCacheKey()
assert.Equal(t, key1, key2)
assert.Equal(t, key2, key3)
}

View File

@@ -204,9 +204,15 @@ func (o *promptFuncOption) ApplyToReader(r *Reader) {
// building an [ast.TaskfileGraph] as it goes. If any errors occur, they will be
// returned immediately.
func (r *Reader) Read(ctx context.Context, node Node) (*ast.TaskfileGraph, error) {
// Clean up git cache after reading all taskfiles
defer func() {
_ = CleanGitCache()
}()
if err := r.include(ctx, node); err != nil {
return nil, err
}
return r.graph, nil
}