mirror of
https://github.com/go-task/task.git
synced 2026-06-11 09:51:50 +00:00
feat(website): expand adopters list after full scan, harden discovery tool
Runs the refreshed find-adopters tool against every public Taskfile on GitHub (1190 unique repos, 13 min) and uses the findings to swap in four higher-signal entries: Azure/Azure-Sentinel replaces Microsoft's niche Fabric provider, flet-dev/flet (16k stars, #1 non-Task hit), Anthropic's Rust protobuf, and charmbracelet/glamour join the list. Gogs drops out since it no longer surfaces in the best-match slice GitHub exposes. Rewrites the discovery strategy: GitHub Code Search caps at 1000 results per query and its size: qualifier turned out unreliable (non-monotone total_count, sporadic 404s), so the tool now paginates each of the four Taskfile variants to the cap and supplements with an org: scan over ~100 curated organizations. That's the practical ceiling without GH Archive or BigQuery, and it captures every big-brand hit we care about. Also drops the code-search rate from 24 to 8.5 req/min to match the real 10 req/min authenticated limit.
This commit is contained in:
@@ -11,16 +11,16 @@ export const adopters: Adopter[] = [
|
||||
url: 'https://github.com/docker/mcp-registry',
|
||||
img: 'https://github.com/docker.png'
|
||||
},
|
||||
{
|
||||
name: 'Microsoft',
|
||||
url: 'https://github.com/Azure/Azure-Sentinel',
|
||||
img: 'https://github.com/microsoft.png'
|
||||
},
|
||||
{
|
||||
name: 'HashiCorp',
|
||||
url: 'https://github.com/hashicorp/terraform-aws-terraform-enterprise-hvd',
|
||||
img: 'https://github.com/hashicorp.png'
|
||||
},
|
||||
{
|
||||
name: 'Microsoft',
|
||||
url: 'https://github.com/microsoft/terraform-provider-fabric',
|
||||
img: 'https://github.com/microsoft.png'
|
||||
},
|
||||
{
|
||||
name: 'Vercel',
|
||||
url: 'https://github.com/vercel/terraform-provider-vercel',
|
||||
@@ -36,32 +36,42 @@ export const adopters: Adopter[] = [
|
||||
url: 'https://github.com/aws-samples/appmod-blueprints',
|
||||
img: 'https://github.com/aws-samples.png'
|
||||
},
|
||||
{
|
||||
name: 'Anthropic',
|
||||
url: 'https://github.com/anthropics/buffa',
|
||||
img: 'https://github.com/anthropics.png'
|
||||
},
|
||||
// Notable open source projects
|
||||
{
|
||||
name: 'Arduino CLI',
|
||||
url: 'https://github.com/arduino/arduino-cli',
|
||||
img: 'https://github.com/arduino.png'
|
||||
name: 'Flet',
|
||||
url: 'https://github.com/flet-dev/flet',
|
||||
img: 'https://github.com/flet-dev.png'
|
||||
},
|
||||
{
|
||||
name: 'GoReleaser',
|
||||
url: 'https://github.com/goreleaser/goreleaser',
|
||||
img: 'https://github.com/goreleaser.png'
|
||||
},
|
||||
{
|
||||
name: 'Arduino CLI',
|
||||
url: 'https://github.com/arduino/arduino-cli',
|
||||
img: 'https://github.com/arduino.png'
|
||||
},
|
||||
{
|
||||
name: 'FerretDB',
|
||||
url: 'https://github.com/FerretDB/FerretDB',
|
||||
img: 'https://github.com/FerretDB.png'
|
||||
},
|
||||
{
|
||||
name: 'Gogs',
|
||||
url: 'https://github.com/gogs/gogs',
|
||||
img: 'https://github.com/gogs.png'
|
||||
},
|
||||
{
|
||||
name: 'Tyk',
|
||||
url: 'https://github.com/TykTechnologies/tyk',
|
||||
img: 'https://github.com/TykTechnologies.png'
|
||||
},
|
||||
{
|
||||
name: 'Charmbracelet',
|
||||
url: 'https://github.com/charmbracelet/glamour',
|
||||
img: 'https://github.com/charmbracelet.png'
|
||||
},
|
||||
{
|
||||
name: 'Outline',
|
||||
url: 'https://github.com/OutlineFoundation/outline-server',
|
||||
|
||||
@@ -6,14 +6,33 @@ candidates for the [taskfile.dev](https://taskfile.dev) "Used by" section.
|
||||
|
||||
## How it works
|
||||
|
||||
GitHub Code Search caps results at 1000 per query. `find-adopters` partitions
|
||||
queries by **star bucket** (and, for the 0-star bucket, by pushed-year) so
|
||||
every sub-query stays under the cap. Each unique repo is then enriched via a
|
||||
single batched GraphQL call (stars, description, owner type, language, topics)
|
||||
and sorted by popularity.
|
||||
GitHub Code Search caps at 1000 results per query and only accepts a narrow
|
||||
set of qualifiers alongside `filename:` — notably `stars:`, `language:`, and
|
||||
`pushed:` don't combine, and `size:` does but its `total_count` isn't monotone
|
||||
as ranges shrink, which makes partitioning unreliable. So the tool takes a
|
||||
pragmatic two-pronged approach:
|
||||
|
||||
The full scan typically takes 15-30 minutes, mostly spent respecting the
|
||||
Code Search rate limit (30 req/min).
|
||||
1. **Global best-match pagination** — paginate `filename:Taskfile.yml`,
|
||||
`Taskfile.yaml`, `Taskfile.dist.yml`, and `Taskfile.dist.yaml` directly up
|
||||
to the 1000-result cap. Captures the top ~900 best-ranked hits per variant.
|
||||
2. **Per-org scan** — iterate a built-in list of ~100 well-known organizations
|
||||
(hyperscalers, OSS vendors, DevOps platforms, etc.) with
|
||||
`filename:Taskfile.yml org:<name>`. Captures every Taskfile inside those
|
||||
orgs even when their repos don't rank in the global top.
|
||||
|
||||
The union is deduplicated and enriched via batched GraphQL calls (stars,
|
||||
description, owner type, language, topics), then sorted by stars.
|
||||
|
||||
A full scan typically takes 15-25 minutes — about 120 Code Search calls at the
|
||||
10 req/min authenticated rate limit, plus a handful of GraphQL batches.
|
||||
|
||||
### Coverage caveat
|
||||
|
||||
GitHub's hard 1000-result cap on the Code Search API means this tool cannot
|
||||
enumerate every Taskfile on GitHub — only the best-ranked slice plus the
|
||||
curated orgs. For truly exhaustive coverage, consider
|
||||
[GH Archive](https://www.gharchive.org/) or the BigQuery public GitHub
|
||||
dataset, which are out of scope here.
|
||||
|
||||
## Usage
|
||||
|
||||
|
||||
@@ -83,7 +83,7 @@ func newClient(token string, verbose bool) *client {
|
||||
http: &http.Client{Timeout: 60 * time.Second},
|
||||
token: token,
|
||||
verbose: verbose,
|
||||
searchGap: 2500 * time.Millisecond, // ~24 req/min, under the 30/min cap
|
||||
searchGap: 7 * time.Second, // ~8.5 req/min, under the 10/min cap
|
||||
}
|
||||
}
|
||||
|
||||
@@ -187,16 +187,30 @@ func urlEscape(s string) string {
|
||||
return b.String()
|
||||
}
|
||||
|
||||
// paginateQuery pages through up to 1000 results for a single query. It
|
||||
// returns early if the reported total_count > 1000 (so the caller can narrow).
|
||||
func (c *client) paginateQuery(q string) (repos []string, total int, err error) {
|
||||
for page := 1; page <= 10; page++ {
|
||||
// paginateQuery pages through up to 1000 results. If total_count is above the
|
||||
// cap reachable by pageLimit pages, it still paginates — callers that want to
|
||||
// avoid wasted calls and subdivide instead should check total beforehand by
|
||||
// passing pageLimit=1.
|
||||
func (c *client) paginateQuery(q string, pageLimit int) (repos []string, total int, err error) {
|
||||
first, err := c.searchCode(q, 1)
|
||||
if err != nil {
|
||||
return nil, 0, err
|
||||
}
|
||||
total = first.TotalCount
|
||||
if total == 0 {
|
||||
return nil, 0, nil
|
||||
}
|
||||
for _, it := range first.Items {
|
||||
repos = append(repos, it.Repository.FullName)
|
||||
}
|
||||
pages := (total + 99) / 100
|
||||
if pages > pageLimit {
|
||||
pages = pageLimit
|
||||
}
|
||||
for page := 2; page <= pages; page++ {
|
||||
sr, err := c.searchCode(q, page)
|
||||
if err != nil {
|
||||
return nil, 0, err
|
||||
}
|
||||
if page == 1 {
|
||||
total = sr.TotalCount
|
||||
return repos, total, err
|
||||
}
|
||||
for _, it := range sr.Items {
|
||||
repos = append(repos, it.Repository.FullName)
|
||||
@@ -208,63 +222,99 @@ func (c *client) paginateQuery(q string) (repos []string, total int, err error)
|
||||
return repos, total, nil
|
||||
}
|
||||
|
||||
// discover partitions the search space by star bucket (and pushed-year for the
|
||||
// stars:0 bucket) so every sub-query stays below the 1000-result cap.
|
||||
// GitHub Code Search caps at 1000 results per query and is unreliable with
|
||||
// the `size:` qualifier (total_count is non-monotone as ranges shrink), so
|
||||
// partitioning tricks don't work cleanly. We instead combine two strategies:
|
||||
//
|
||||
// 1. Paginate each Taskfile variant directly — gets ~900 top-ranked hits per
|
||||
// variant (the "best match" slice GitHub surfaces).
|
||||
// 2. Iterate a curated list of well-known organizations with an explicit
|
||||
// `org:` qualifier — gets full coverage inside big brands even when their
|
||||
// repos don't rank in the global top 900.
|
||||
//
|
||||
// The union is deduplicated and enriched via GraphQL.
|
||||
|
||||
// knownOrgs is a snapshot of organizations worth scanning explicitly. Adding
|
||||
// here captures every Taskfile inside the org regardless of its global rank.
|
||||
// Loosely ordered from most likely to least.
|
||||
var knownOrgs = []string{
|
||||
// Hyperscalers / clouds
|
||||
"docker", "microsoft", "google", "GoogleCloudPlatform", "aws", "awslabs",
|
||||
"aws-samples", "amazon-science", "Azure", "Azure-Samples",
|
||||
// Infra / DevOps vendors
|
||||
"hashicorp", "hashicorp-forge", "vercel", "cloudflare", "digitalocean",
|
||||
"heroku", "JetBrains", "pulumi", "buildkite", "circleci", "dagger",
|
||||
"temporalio", "encoredev", "argoproj", "fluxcd", "flux-framework",
|
||||
// Dev tools / platforms
|
||||
"netflix", "shopify", "airbnb", "uber", "lyft", "stripe", "github",
|
||||
"gitlabhq", "atlassian", "RedHat", "RedHatOfficial", "openshift",
|
||||
// Communication / consumer
|
||||
"spotify", "slackapi", "discord", "figma", "linear", "twilio", "segmentio",
|
||||
// Data / ML
|
||||
"prisma", "supabase", "railwayapp", "superfly", "fly-apps", "planetscale",
|
||||
"tailscale", "coder", "anthropics", "openai", "huggingface",
|
||||
"pytorch", "tensorflow",
|
||||
// Observability / CNCF
|
||||
"grafana", "prometheus", "envoyproxy", "getsentry", "sentry", "cncf",
|
||||
"helm", "istio", "linkerd", "traefik", "caddyserver",
|
||||
// Frontend frameworks
|
||||
"vitejs", "biomejs", "sveltejs", "vuejs", "reactjs", "astro", "nuxt",
|
||||
// Databases
|
||||
"mongodb-labs", "redis", "neo4j", "elastic", "influxdata", "timescale",
|
||||
"clickhouse", "FerretDB",
|
||||
// Go ecosystem / popular OSS
|
||||
"goreleaser", "spf13", "urfave", "charmbracelet", "nodejs", "golang",
|
||||
"rust-lang", "python", "apache", "etcd-io", "grpc", "arduino",
|
||||
// Data eng
|
||||
"dbt-labs", "astronomer", "prefecthq",
|
||||
}
|
||||
|
||||
// discover walks every Taskfile variant with global pagination plus per-org
|
||||
// scans, and returns unique owner/name pairs.
|
||||
func (c *client) discover() (map[string]struct{}, error) {
|
||||
uniq := make(map[string]struct{})
|
||||
|
||||
// Star buckets cover the whole range.
|
||||
buckets := []string{
|
||||
"stars:>=1000",
|
||||
"stars:100..999",
|
||||
"stars:10..99",
|
||||
"stars:1..9",
|
||||
"stars:0",
|
||||
variants := []string{
|
||||
"Taskfile.yml",
|
||||
"Taskfile.yaml",
|
||||
"Taskfile.dist.yml",
|
||||
"Taskfile.dist.yaml",
|
||||
}
|
||||
|
||||
extensions := []string{"Taskfile.yml", "Taskfile.yaml"}
|
||||
|
||||
for _, ext := range extensions {
|
||||
for _, bucket := range buckets {
|
||||
q := fmt.Sprintf("filename:%s %s", ext, bucket)
|
||||
c.logf("query: %s", q)
|
||||
repos, total, err := c.paginateQuery(q)
|
||||
if err != nil {
|
||||
fmt.Fprintf(os.Stderr, "warn: %v\n", err)
|
||||
continue
|
||||
}
|
||||
c.logf(" total=%d collected=%d", total, len(repos))
|
||||
for _, r := range repos {
|
||||
uniq[r] = struct{}{}
|
||||
}
|
||||
if total > 1000 {
|
||||
// Partition the bucket further by pushed-year.
|
||||
c.logf(" bucket exceeds 1000, subdividing by pushed-year")
|
||||
if err := c.discoverByPushedYear(ext, bucket, uniq); err != nil {
|
||||
fmt.Fprintf(os.Stderr, "warn: pushed partition: %v\n", err)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
return uniq, nil
|
||||
}
|
||||
|
||||
func (c *client) discoverByPushedYear(ext, bucket string, uniq map[string]struct{}) error {
|
||||
now := time.Now().Year()
|
||||
// GitHub Search was introduced in 2008 — years before that won't have hits.
|
||||
for year := 2015; year <= now; year++ {
|
||||
q := fmt.Sprintf("filename:%s %s pushed:%d-01-01..%d-12-31", ext, bucket, year, year)
|
||||
c.logf(" sub-query: %s", q)
|
||||
repos, total, err := c.paginateQuery(q)
|
||||
c.logf("phase: global pagination (best-match top ~900 per variant)")
|
||||
for _, v := range variants {
|
||||
q := fmt.Sprintf("filename:%s", v)
|
||||
c.logf(" query: %s", q)
|
||||
repos, total, err := c.paginateQuery(q, 10)
|
||||
if err != nil {
|
||||
return err
|
||||
fmt.Fprintf(os.Stderr, "warn: variant %s: %v\n", v, err)
|
||||
continue
|
||||
}
|
||||
c.logf(" total=%d collected=%d", total, len(repos))
|
||||
for _, r := range repos {
|
||||
uniq[r] = struct{}{}
|
||||
}
|
||||
}
|
||||
return nil
|
||||
|
||||
c.logf("phase: per-org scan (%d orgs)", len(knownOrgs))
|
||||
for _, org := range knownOrgs {
|
||||
q := fmt.Sprintf("filename:Taskfile.yml org:%s", org)
|
||||
repos, total, err := c.paginateQuery(q, 10)
|
||||
if err != nil {
|
||||
// Orgs that don't exist return 404 — log once and move on.
|
||||
c.logf(" skip %s: %v", org, err)
|
||||
continue
|
||||
}
|
||||
if total == 0 {
|
||||
continue
|
||||
}
|
||||
c.logf(" org:%s total=%d collected=%d", org, total, len(repos))
|
||||
for _, r := range repos {
|
||||
uniq[r] = struct{}{}
|
||||
}
|
||||
}
|
||||
|
||||
return uniq, nil
|
||||
}
|
||||
|
||||
// ----- Enrichment (GraphQL) -----
|
||||
|
||||
Reference in New Issue
Block a user