Cache repository default branch commit status to reduce query on commit status table (#29444)

After repository commit status has been introduced on dashaboard, the
most top SQL comes from `GetLatestCommitStatusForPairs`.

This PR adds a cache for the repository's default branch's latest
combined commit status. When a new commit status updated, the cache will
be marked as invalid.

<img width="998" alt="image"
src="https://github.com/go-gitea/gitea/assets/81045/76759de7-3a83-4d54-8571-278f5422aed3">
This commit is contained in:
Lunny Xiao 2024-03-06 20:17:19 +08:00 committed by GitHub
parent 90a3f2d4b7
commit e308d25f1b
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
4 changed files with 145 additions and 70 deletions

@ -14,7 +14,7 @@ import (
"code.gitea.io/gitea/routers/api/v1/utils"
"code.gitea.io/gitea/services/context"
"code.gitea.io/gitea/services/convert"
files_service "code.gitea.io/gitea/services/repository/files"
commitstatus_service "code.gitea.io/gitea/services/repository/commitstatus"
)
// NewCommitStatus creates a new CommitStatus
@ -64,7 +64,7 @@ func NewCommitStatus(ctx *context.APIContext) {
Description: form.Description,
Context: form.Context,
}
if err := files_service.CreateCommitStatus(ctx, ctx.Repo.Repository, ctx.Doer, sha, status); err != nil {
if err := commitstatus_service.CreateCommitStatus(ctx, ctx.Repo.Repository, ctx.Doer, sha, status); err != nil {
ctx.Error(http.StatusInternalServerError, "CreateCommitStatus", err)
return
}

@ -35,6 +35,7 @@ import (
"code.gitea.io/gitea/services/forms"
repo_service "code.gitea.io/gitea/services/repository"
archiver_service "code.gitea.io/gitea/services/repository/archiver"
commitstatus_service "code.gitea.io/gitea/services/repository/commitstatus"
)
const (
@ -634,30 +635,14 @@ func SearchRepo(ctx *context.Context) {
return
}
// collect the latest commit of each repo
// at most there are dozens of repos (limited by MaxResponseItems), so it's not a big problem at the moment
repoBranchNames := make(map[int64]string, len(repos))
for _, repo := range repos {
repoBranchNames[repo.ID] = repo.DefaultBranch
}
repoIDsToLatestCommitSHAs, err := git_model.FindBranchesByRepoAndBranchName(ctx, repoBranchNames)
latestCommitStatuses, err := commitstatus_service.FindReposLastestCommitStatuses(ctx, repos)
if err != nil {
log.Error("FindBranchesByRepoAndBranchName: %v", err)
return
}
// call the database O(1) times to get the commit statuses for all repos
repoToItsLatestCommitStatuses, err := git_model.GetLatestCommitStatusForPairs(ctx, repoIDsToLatestCommitSHAs, db.ListOptionsAll)
if err != nil {
log.Error("GetLatestCommitStatusForPairs: %v", err)
log.Error("FindReposLastestCommitStatuses: %v", err)
return
}
results := make([]*repo_service.WebSearchRepository, len(repos))
for i, repo := range repos {
latestCommitStatus := git_model.CalcCommitStatus(repoToItsLatestCommitStatuses[repo.ID])
results[i] = &repo_service.WebSearchRepository{
Repository: &api.Repository{
ID: repo.ID,
@ -671,8 +656,11 @@ func SearchRepo(ctx *context.Context) {
Link: repo.Link(),
Internal: !repo.IsPrivate && repo.Owner.Visibility == api.VisibleTypePrivate,
},
LatestCommitStatus: latestCommitStatus,
LocaleLatestCommitStatus: latestCommitStatus.LocaleString(ctx.Locale),
}
if latestCommitStatuses[i] != nil {
results[i].LatestCommitStatus = latestCommitStatuses[i]
results[i].LocaleLatestCommitStatus = latestCommitStatuses[i].LocaleString(ctx.Locale)
}
}

@ -0,0 +1,135 @@
// Copyright 2024 The Gitea Authors. All rights reserved.
// SPDX-License-Identifier: MIT
package commitstatus
import (
"context"
"crypto/sha256"
"fmt"
"code.gitea.io/gitea/models/db"
git_model "code.gitea.io/gitea/models/git"
repo_model "code.gitea.io/gitea/models/repo"
user_model "code.gitea.io/gitea/models/user"
"code.gitea.io/gitea/modules/cache"
"code.gitea.io/gitea/modules/git"
"code.gitea.io/gitea/modules/gitrepo"
"code.gitea.io/gitea/modules/log"
api "code.gitea.io/gitea/modules/structs"
"code.gitea.io/gitea/services/automerge"
)
func getCacheKey(repoID int64, brancheName string) string {
hashBytes := sha256.Sum256([]byte(fmt.Sprintf("%d:%s", repoID, brancheName)))
return fmt.Sprintf("commit_status:%x", hashBytes)
}
func updateCommitStatusCache(ctx context.Context, repoID int64, branchName string, status api.CommitStatusState) error {
c := cache.GetCache()
return c.Put(getCacheKey(repoID, branchName), string(status), 3*24*60)
}
func deleteCommitStatusCache(ctx context.Context, repoID int64, branchName string) error {
c := cache.GetCache()
return c.Delete(getCacheKey(repoID, branchName))
}
// CreateCommitStatus creates a new CommitStatus given a bunch of parameters
// NOTE: All text-values will be trimmed from whitespaces.
// Requires: Repo, Creator, SHA
func CreateCommitStatus(ctx context.Context, repo *repo_model.Repository, creator *user_model.User, sha string, status *git_model.CommitStatus) error {
repoPath := repo.RepoPath()
// confirm that commit is exist
gitRepo, closer, err := gitrepo.RepositoryFromContextOrOpen(ctx, repo)
if err != nil {
return fmt.Errorf("OpenRepository[%s]: %w", repoPath, err)
}
defer closer.Close()
objectFormat := git.ObjectFormatFromName(repo.ObjectFormatName)
commit, err := gitRepo.GetCommit(sha)
if err != nil {
return fmt.Errorf("GetCommit[%s]: %w", sha, err)
}
if len(sha) != objectFormat.FullLength() {
// use complete commit sha
sha = commit.ID.String()
}
if err := git_model.NewCommitStatus(ctx, git_model.NewCommitStatusOptions{
Repo: repo,
Creator: creator,
SHA: commit.ID,
CommitStatus: status,
}); err != nil {
return fmt.Errorf("NewCommitStatus[repo_id: %d, user_id: %d, sha: %s]: %w", repo.ID, creator.ID, sha, err)
}
defaultBranchCommit, err := gitRepo.GetBranchCommit(repo.DefaultBranch)
if err != nil {
return fmt.Errorf("GetBranchCommit[%s]: %w", repo.DefaultBranch, err)
}
if commit.ID.String() == defaultBranchCommit.ID.String() { // since one commit status updated, the combined commit status should be invalid
if err := deleteCommitStatusCache(ctx, repo.ID, repo.DefaultBranch); err != nil {
log.Error("deleteCommitStatusCache[%d:%s] failed: %v", repo.ID, repo.DefaultBranch, err)
}
}
if status.State.IsSuccess() {
if err := automerge.MergeScheduledPullRequest(ctx, sha, repo); err != nil {
return fmt.Errorf("MergeScheduledPullRequest[repo_id: %d, user_id: %d, sha: %s]: %w", repo.ID, creator.ID, sha, err)
}
}
return nil
}
// FindReposLastestCommitStatuses loading repository default branch latest combinded commit status with cache
func FindReposLastestCommitStatuses(ctx context.Context, repos []*repo_model.Repository) ([]*git_model.CommitStatus, error) {
results := make([]*git_model.CommitStatus, len(repos))
c := cache.GetCache()
for i, repo := range repos {
status, ok := c.Get(getCacheKey(repo.ID, repo.DefaultBranch)).(string)
if ok && status != "" {
results[i] = &git_model.CommitStatus{State: api.CommitStatusState(status)}
}
}
// collect the latest commit of each repo
// at most there are dozens of repos (limited by MaxResponseItems), so it's not a big problem at the moment
repoBranchNames := make(map[int64]string, len(repos))
for i, repo := range repos {
if results[i] == nil {
repoBranchNames[repo.ID] = repo.DefaultBranch
}
}
repoIDsToLatestCommitSHAs, err := git_model.FindBranchesByRepoAndBranchName(ctx, repoBranchNames)
if err != nil {
return nil, fmt.Errorf("FindBranchesByRepoAndBranchName: %v", err)
}
// call the database O(1) times to get the commit statuses for all repos
repoToItsLatestCommitStatuses, err := git_model.GetLatestCommitStatusForPairs(ctx, repoIDsToLatestCommitSHAs, db.ListOptionsAll)
if err != nil {
return nil, fmt.Errorf("GetLatestCommitStatusForPairs: %v", err)
}
for i, repo := range repos {
if results[i] == nil {
results[i] = git_model.CalcCommitStatus(repoToItsLatestCommitStatuses[repo.ID])
if results[i].State != "" {
if err := updateCommitStatusCache(ctx, repo.ID, repo.DefaultBranch, results[i].State); err != nil {
log.Error("updateCommitStatusCache[%d:%s] failed: %v", repo.ID, repo.DefaultBranch, err)
}
}
}
}
return results, nil
}

@ -5,61 +5,13 @@ package files
import (
"context"
"fmt"
asymkey_model "code.gitea.io/gitea/models/asymkey"
git_model "code.gitea.io/gitea/models/git"
repo_model "code.gitea.io/gitea/models/repo"
user_model "code.gitea.io/gitea/models/user"
"code.gitea.io/gitea/modules/git"
"code.gitea.io/gitea/modules/gitrepo"
"code.gitea.io/gitea/modules/structs"
"code.gitea.io/gitea/services/automerge"
)
// CreateCommitStatus creates a new CommitStatus given a bunch of parameters
// NOTE: All text-values will be trimmed from whitespaces.
// Requires: Repo, Creator, SHA
func CreateCommitStatus(ctx context.Context, repo *repo_model.Repository, creator *user_model.User, sha string, status *git_model.CommitStatus) error {
repoPath := repo.RepoPath()
// confirm that commit is exist
gitRepo, closer, err := gitrepo.RepositoryFromContextOrOpen(ctx, repo)
if err != nil {
return fmt.Errorf("OpenRepository[%s]: %w", repoPath, err)
}
defer closer.Close()
objectFormat := git.ObjectFormatFromName(repo.ObjectFormatName)
commit, err := gitRepo.GetCommit(sha)
if err != nil {
gitRepo.Close()
return fmt.Errorf("GetCommit[%s]: %w", sha, err)
} else if len(sha) != objectFormat.FullLength() {
// use complete commit sha
sha = commit.ID.String()
}
gitRepo.Close()
if err := git_model.NewCommitStatus(ctx, git_model.NewCommitStatusOptions{
Repo: repo,
Creator: creator,
SHA: commit.ID,
CommitStatus: status,
}); err != nil {
return fmt.Errorf("NewCommitStatus[repo_id: %d, user_id: %d, sha: %s]: %w", repo.ID, creator.ID, sha, err)
}
if status.State.IsSuccess() {
if err := automerge.MergeScheduledPullRequest(ctx, sha, repo); err != nil {
return fmt.Errorf("MergeScheduledPullRequest[repo_id: %d, user_id: %d, sha: %s]: %w", repo.ID, creator.ID, sha, err)
}
}
return nil
}
// CountDivergingCommits determines how many commits a branch is ahead or behind the repository's base branch
func CountDivergingCommits(ctx context.Context, repo *repo_model.Repository, branch string) (*git.DivergeObject, error) {
divergence, err := git.GetDivergingCommits(ctx, repo.RepoPath(), repo.DefaultBranch, branch)