// Copyright (C) 2024 Umorpha Systems // SPDX-License-Identifier: AGPL-3.0-or-later package gitcache import ( "errors" "fmt" "io" "os" "os/exec" "path/filepath" "sync" "syscall" "time" ) const ( tsDir = "x-gitcache-ts" tmpDir = "x-gitcache-tmp" ) type Cache struct { Dir string MinPeriod time.Duration initOnce sync.Once initErr error } // "ll" stands for "low level" ///////////////////////////////////////////////// func (cache *Cache) llInit(stderr io.Writer) error { fmt.Fprintf(stderr, "[gitcache] Initializing cache...\n") cmd := exec.Command("git", "-c", "init.defaultBranch=main", "init", "--bare", cache.Dir) cmd.Stderr = stderr if err := cmd.Run(); err != nil { return err } if err := mkdirAllowExisting(filepath.Join(cache.Dir, tsDir)); err != nil { return err } return nil } func (cache *Cache) llFetch(stderr io.Writer, namespace, url string) error { fmt.Fprintf(stderr, "[gitcache] Fetching %q...\n", url) cmd := exec.Command("git", "fetch", "--no-write-fetch-head", "--no-recurse-submodules", "--no-tags", "--prune", "--force", "--", url, "*:refs/namespaces/"+namespace+"/*") cmd.Dir = cache.Dir cmd.Stderr = stderr return cmd.Run() } //////////////////////////////////////////////////////////////////////////////// func (cache *Cache) init(stderr io.Writer) error { cache.initOnce.Do(func() { cache.initErr = cache.llInit(stderr) }) if err := mkdirAllowExisting(filepath.Join(cache.Dir, tmpDir)); err != nil { return err } return cache.initErr } func (cache *Cache) Fetch(stderr io.Writer, url string) error { namespace, ok := URL2NS(url) if !ok { return fmt.Errorf("invalid URL: %q", url) } if err := cache.init(stderr); err != nil { return err } tsFile := filepath.Join(cache.Dir, tsDir, namespace) if cache.MinPeriod != 0 { ts, err := mtime(tsFile) if err == nil && ts.Add(cache.MinPeriod).After(time.Now()) { return nil } } if err := cache.llFetch(stderr, namespace, url); err != nil { return err } return touch(tsFile) } func (cache *Cache) Clone(stderr io.Writer, url, dir string, flags ...string) error { namespace, ok := URL2NS(url) if !ok { return fmt.Errorf("invalid URL: %q", url) } if err := cache.init(stderr); err != nil { return err } if _, err := mtime(filepath.Join(cache.Dir, tsDir, namespace)); err != nil { return fmt.Errorf("repo does not exist: %q", url) } cacheDir, err := filepath.Abs(cache.Dir) if err != nil { return err } cacheURL := "ext::git --namespace " + namespace + " %s " + cacheDir fmt.Fprintf(stderr, "[gitcache] Cloning %q to %q...\n", url, dir) cmd := exec.Command("git", append(append([]string{ "-c", "protocol.ext.allow=user", "-c", "url." + cacheURL + ".insteadOf=" + url, "clone", }, flags...), "--", url, dir)...) cmd.Stderr = stderr if err := cmd.Run(); err != nil { return err } return nil } func (cache *Cache) WithFastClone(stderr io.Writer, url string, fn func(dir string) error) (err error) { namespace, ok := URL2NS(url) if !ok { return fmt.Errorf("invalid URL: %q", url) } maybeSetErr := func(_err error) { if err == nil && _err != nil { err = _err } } if _, err := mtime(filepath.Join(cache.Dir, tsDir, namespace)); err != nil { return fmt.Errorf("repo does not exist: %q", url) } fmt.Fprintf(stderr, "[gitcache] Creating temporary view of %q...\n", url) // It is important that this be on the same filesystem as the cache so that it can use // hard-links to be extremely light-weight. So, we put it in `cache.git/x-gitcache-tmp/`. // // We also flock() the directory, so that we can GC them later if this process gets // interrupted. var tmpdir string for { tmpdir, err = os.MkdirTemp(filepath.Join(cache.Dir, tmpDir), "*.git") if err != nil { return err } fh, err := os.Open(tmpdir) if err != nil { return err } if err := syscall.Flock(int(fh.Fd()), syscall.LOCK_EX|syscall.LOCK_NB); err != nil { if errors.Is(err, syscall.EWOULDBLOCK) { // Presumably, a concurrent .Maintenance() call locked it before we // could. Try again. if err := fh.Close(); err != nil { return err } continue } return &os.PathError{Op: "flock", Path: tmpdir, Err: err} } defer func() { maybeSetErr(os.RemoveAll(tmpdir)) maybeSetErr(fh.Close()) }() break } cmd := exec.Command("git", "--namespace="+namespace, "clone", "--mirror", cache.Dir, tmpdir) cmd.Stderr = stderr if err := cmd.Run(); err != nil { return err } return fn(tmpdir) } func (cache *Cache) RevParse(stderr io.Writer, url string, revs ...string) (ret []string, err error) { err = cache.WithFastClone(stderr, url, func(tmpdir string) error { ret = make([]string, len(revs)) for i, rev := range revs { cmd := exec.Command("git", "rev-parse", "--verify", rev) cmd.Dir = tmpdir out, err := cmd.Output() if err != nil { var eErr *exec.ExitError if errors.As(err, &eErr) { if eErr.ExitCode() == 128 { continue } if len(eErr.Stderr) > 0 { err = fmt.Errorf("%w: %s", err, eErr.Stderr) } } return fmt.Errorf("resolving: %q: %w", rev, err) } ret[i] = string(out[:len(out)-1]) } return nil }) return ret, err } func (cache *Cache) Maintenance(stderr io.Writer, flags ...string) error { fmt.Fprintf(stderr, "[gitcache] Maintenance: %q...\n", flags) cmd := exec.Command("git", append([]string{"maintenance", "run"}, flags...)...) cmd.Dir = cache.Dir cmd.Stderr = stderr if err := cmd.Run(); err != nil { return err } // GC stale .WithFastClone() dirs. entries, err := os.ReadDir(filepath.Join(cache.Dir, tmpDir)) if err != nil { return err } for _, entry := range entries { dirpath := filepath.Join(cache.Dir, tmpDir, entry.Name()) fh, err := os.Open(dirpath) if err != nil { return err } if err := syscall.Flock(int(fh.Fd()), syscall.LOCK_EX|syscall.LOCK_NB); err != nil { if errors.Is(err, syscall.EWOULDBLOCK) { if err := fh.Close(); err != nil { return err } continue } return &os.PathError{Op: "flock", Path: dirpath, Err: err} } if err := os.RemoveAll(dirpath); err != nil { return err } if err := fh.Close(); err != nil { return err } } return nil }