// Copyright (C) 2024 Umorpha Systems // SPDX-License-Identifier: AGPL-3.0-or-later package gitcache import ( "errors" "fmt" "io" "os" "os/exec" "path/filepath" "strings" "sync" "syscall" "time" ) const ( tsDir = "x-gitcache-ts" tmpDir = "x-gitcache-tmp" ) type Cache struct { Dir string MinPeriod time.Duration initOnce sync.Once initErr error } // "ll" stands for "low level" ///////////////////////////////////////////////// func (cache *Cache) llInit(stderr io.Writer) error { fmt.Fprintf(stderr, "[gitcache] Initializing cache...\n") cmd := exec.Command("git", "-c", "init.defaultBranch=main", "init", "--bare", cache.Dir) cmd.Stderr = stderr if err := cmd.Run(); err != nil { return err } if err := mkdirAllowExisting(filepath.Join(cache.Dir, tsDir)); err != nil { return err } return nil } func (cache *Cache) llFetch(stderr io.Writer, namespace, url string) error { fmt.Fprintf(stderr, "[gitcache] Fetching %q...\n", url) cmd := exec.Command("git", "fetch", "--no-write-fetch-head", "--no-recurse-submodules", "--no-tags", "--prune", "--force", "--", url, "*:refs/namespaces/"+namespace+"/*") cmd.Dir = cache.Dir cmd.Stderr = stderr return cmd.Run() } //////////////////////////////////////////////////////////////////////////////// func (cache *Cache) init(stderr io.Writer) error { cache.initOnce.Do(func() { cache.initErr = cache.llInit(stderr) }) if err := mkdirAllowExisting(filepath.Join(cache.Dir, tmpDir)); err != nil { return err } return cache.initErr } func (cache *Cache) Fetch(stderr io.Writer, url string) error { namespace, ok := URL2NS(url) if !ok { return fmt.Errorf("invalid URL: %q", url) } if err := cache.init(stderr); err != nil { return err } tsFile := filepath.Join(cache.Dir, tsDir, namespace) if cache.MinPeriod != 0 { ts, err := mtime(tsFile) if err == nil && ts.Add(cache.MinPeriod).After(time.Now()) { return nil } } if err := cache.llFetch(stderr, namespace, url); err != nil { return err } return touch(tsFile) } func (cache *Cache) Clone(stderr io.Writer, url, dir string, flags ...string) error { namespace, ok := URL2NS(url) if !ok { return fmt.Errorf("invalid URL: %q", url) } if err := cache.init(stderr); err != nil { return err } if _, err := mtime(filepath.Join(cache.Dir, tsDir, namespace)); err != nil { return fmt.Errorf("repo does not exist: %q", url) } cacheDir, err := filepath.Abs(cache.Dir) if err != nil { return err } cacheURL := "ext::git --namespace " + namespace + " %s " + cacheDir fmt.Fprintf(stderr, "[gitcache] Cloning %q to %q...\n", url, dir) cmd := exec.Command("git", append(append([]string{ "-c", "protocol.ext.allow=user", "-c", "url." + cacheURL + ".insteadOf=" + url, "clone", }, flags...), "--", url, dir)...) cmd.Stderr = stderr if err := cmd.Run(); err != nil { return err } return nil } func (cache *Cache) WithFastClone(stderr io.Writer, url string, fn func(dir string) error) (err error) { namespace, ok := URL2NS(url) if !ok { return fmt.Errorf("invalid URL: %q", url) } maybeSetErr := func(_err error) { if err == nil && _err != nil { err = _err } } if _, err := mtime(filepath.Join(cache.Dir, tsDir, namespace)); err != nil { return fmt.Errorf("repo does not exist: %q", url) } fmt.Fprintf(stderr, "[gitcache] Creating temporary view of %q...\n", url) // It is important that this be on the same filesystem as the cache so that it can use // hard-links to be extremely light-weight. So, we put it in `cache.git/x-gitcache-tmp/`. // // We also flock() the directory, so that we can GC them later if this process gets // interrupted. var tmpdir string for { tmpdir, err = os.MkdirTemp(filepath.Join(cache.Dir, tmpDir), "*.git") if err != nil { return err } fh, err := os.Open(tmpdir) if err != nil { return err } if err := syscall.Flock(int(fh.Fd()), syscall.LOCK_EX|syscall.LOCK_NB); err != nil { if errors.Is(err, syscall.EWOULDBLOCK) { // Presumably, a concurrent .Maintenance() call locked it before we // could. Try again. if err := fh.Close(); err != nil { return err } continue } return &os.PathError{Op: "flock", Path: tmpdir, Err: err} } defer func() { maybeSetErr(os.RemoveAll(tmpdir)) maybeSetErr(fh.Close()) }() break } cmd := exec.Command("git", "--namespace="+namespace, "clone", "--mirror", cache.Dir, tmpdir) cmd.Stderr = stderr if err := cmd.Run(); err != nil { return err } return fn(tmpdir) } func (cache *Cache) listRefs(stderr io.Writer, prefixes ...string) (map[string]string, error) { fmt.Fprintf(stderr, "[gitcache] Listing refs: %q...\n", prefixes) var stdout strings.Builder cmd := exec.Command("git", append([]string{"for-each-ref", "--format=%(objectname) %(refname)"}, prefixes...)...) cmd.Dir = cache.Dir cmd.Stdout = &stdout cmd.Stderr = stderr if err := cmd.Run(); err != nil { return nil, err } lines := strings.Split(stdout.String(), "\n") ret := make(map[string]string, len(lines)-1) for _, line := range lines { hash, refname, ok := strings.Cut(line, " ") if !ok { continue } ret[refname] = hash } return ret, nil } // returns ("", nil) if the rev doesn't exist, but there wasn't an // error determining that. func (cache *Cache) revParse(stderr io.Writer, rev string) (ret string, err error) { var stdout strings.Builder cmd := exec.Command("git", "rev-parse", "--verify", rev) cmd.Dir = cache.Dir cmd.Stdout = &stdout cmd.Stderr = stderr if err := cmd.Run(); err != nil { var eErr *exec.ExitError if errors.As(err, &eErr) && eErr.ExitCode() == 128 { return "", nil } return "", err } return strings.TrimSpace(stdout.String()), nil } // returns map["{refglob}{suffix}"]map["{refname}{suffix}"]"{hash}" func (cache *Cache) RevParse(stderr io.Writer, url string, revglobs ...string) (map[string]map[string]string, error) { ns, ok := URL2NS(url) if !ok { return nil, fmt.Errorf("invalid URL: %q", url) } nsprefix := "refs/namespaces/" + ns + "/" refs, err := cache.listRefs(stderr, nsprefix) if err != nil { return nil, err } ret := make(map[string]map[string]string, len(revglobs)) for _, revglob := range revglobs { refglob, suffix, ok := ParseRev(revglob, true) if !ok { continue } for nsRefname := range refs { refname := strings.TrimPrefix(nsRefname, nsprefix) if !MatchRef(refglob, refname) { continue } var hash string if suffix == "" { // fast-path hash = refs[nsRefname] } else { var err error hash, err = cache.revParse(stderr, nsRefname) if err != nil { return nil, fmt.Errorf("resolving: %q: %w", revglob, err) } } if hash != "" { if ret[revglob] == nil { ret[revglob] = make(map[string]string, 1) } ret[revglob][refname+suffix] = hash } } } return ret, nil } func (cache *Cache) Maintenance(stderr io.Writer, flags ...string) error { fmt.Fprintf(stderr, "[gitcache] Maintenance: %q...\n", flags) cmd := exec.Command("git", append([]string{"maintenance", "run"}, flags...)...) cmd.Dir = cache.Dir cmd.Stderr = stderr if err := cmd.Run(); err != nil { return err } // GC stale .WithFastClone() dirs. entries, err := os.ReadDir(filepath.Join(cache.Dir, tmpDir)) if err != nil { return err } for _, entry := range entries { dirpath := filepath.Join(cache.Dir, tmpDir, entry.Name()) fh, err := os.Open(dirpath) if err != nil { return err } if err := syscall.Flock(int(fh.Fd()), syscall.LOCK_EX|syscall.LOCK_NB); err != nil { if errors.Is(err, syscall.EWOULDBLOCK) { if err := fh.Close(); err != nil { return err } continue } return &os.PathError{Op: "flock", Path: dirpath, Err: err} } if err := os.RemoveAll(dirpath); err != nil { return err } if err := fh.Close(); err != nil { return err } } return nil }