forkjo/services/migrations/dump.go
Lunny Xiao 6e19484f4d
Sync branches into databases ()
Related 
Related  
Related 
Close 
Related  

This PR will change all the branches retrieve method from reading git
data to read database to reduce git read operations.

- [x] Sync git branches information into database when push git data
- [x] Create a new table `Branch`, merge some columns of `DeletedBranch`
into `Branch` table and drop the table `DeletedBranch`.
- [x] Read `Branch` table when visit `code` -> `branch` page
- [x] Read `Branch` table when list branch names in `code` page dropdown
- [x] Read `Branch` table when list git ref compare page
- [x] Provide a button in admin page to manually sync all branches.
- [x] Sync branches if repository is not empty but database branches are
empty when visiting pages with branches list
- [x] Use `commit_time desc` as the default FindBranch order by to keep
consistent as before and deleted branches will be always at the end.

---------

Co-authored-by: Jason Song <i@wolfogre.com>
2023-06-29 10:03:20 +00:00

737 lines
20 KiB
Go

// Copyright 2020 The Gitea Authors. All rights reserved.
// SPDX-License-Identifier: MIT
package migrations
import (
"context"
"errors"
"fmt"
"io"
"net/http"
"net/url"
"os"
"path/filepath"
"strconv"
"strings"
"time"
user_model "code.gitea.io/gitea/models/user"
"code.gitea.io/gitea/modules/git"
"code.gitea.io/gitea/modules/log"
base "code.gitea.io/gitea/modules/migration"
"code.gitea.io/gitea/modules/repository"
"code.gitea.io/gitea/modules/setting"
"code.gitea.io/gitea/modules/structs"
"github.com/google/uuid"
"gopkg.in/yaml.v3"
)
var _ base.Uploader = &RepositoryDumper{}
// RepositoryDumper implements an Uploader to the local directory
type RepositoryDumper struct {
ctx context.Context
baseDir string
repoOwner string
repoName string
opts base.MigrateOptions
milestoneFile *os.File
labelFile *os.File
releaseFile *os.File
issueFile *os.File
commentFiles map[int64]*os.File
pullrequestFile *os.File
reviewFiles map[int64]*os.File
gitRepo *git.Repository
prHeadCache map[string]string
}
// NewRepositoryDumper creates an gitea Uploader
func NewRepositoryDumper(ctx context.Context, baseDir, repoOwner, repoName string, opts base.MigrateOptions) (*RepositoryDumper, error) {
baseDir = filepath.Join(baseDir, repoOwner, repoName)
if err := os.MkdirAll(baseDir, os.ModePerm); err != nil {
return nil, err
}
return &RepositoryDumper{
ctx: ctx,
opts: opts,
baseDir: baseDir,
repoOwner: repoOwner,
repoName: repoName,
prHeadCache: make(map[string]string),
commentFiles: make(map[int64]*os.File),
reviewFiles: make(map[int64]*os.File),
}, nil
}
// MaxBatchInsertSize returns the table's max batch insert size
func (g *RepositoryDumper) MaxBatchInsertSize(tp string) int {
return 1000
}
func (g *RepositoryDumper) gitPath() string {
return filepath.Join(g.baseDir, "git")
}
func (g *RepositoryDumper) wikiPath() string {
return filepath.Join(g.baseDir, "wiki")
}
func (g *RepositoryDumper) commentDir() string {
return filepath.Join(g.baseDir, "comments")
}
func (g *RepositoryDumper) reviewDir() string {
return filepath.Join(g.baseDir, "reviews")
}
func (g *RepositoryDumper) setURLToken(remoteAddr string) (string, error) {
if len(g.opts.AuthToken) > 0 || len(g.opts.AuthUsername) > 0 {
u, err := url.Parse(remoteAddr)
if err != nil {
return "", err
}
u.User = url.UserPassword(g.opts.AuthUsername, g.opts.AuthPassword)
if len(g.opts.AuthToken) > 0 {
u.User = url.UserPassword("oauth2", g.opts.AuthToken)
}
remoteAddr = u.String()
}
return remoteAddr, nil
}
// CreateRepo creates a repository
func (g *RepositoryDumper) CreateRepo(repo *base.Repository, opts base.MigrateOptions) error {
f, err := os.Create(filepath.Join(g.baseDir, "repo.yml"))
if err != nil {
return err
}
defer f.Close()
bs, err := yaml.Marshal(map[string]interface{}{
"name": repo.Name,
"owner": repo.Owner,
"description": repo.Description,
"clone_addr": opts.CloneAddr,
"original_url": repo.OriginalURL,
"is_private": opts.Private,
"service_type": opts.GitServiceType,
"wiki": opts.Wiki,
"issues": opts.Issues,
"milestones": opts.Milestones,
"labels": opts.Labels,
"releases": opts.Releases,
"comments": opts.Comments,
"pulls": opts.PullRequests,
"assets": opts.ReleaseAssets,
})
if err != nil {
return err
}
if _, err := f.Write(bs); err != nil {
return err
}
repoPath := g.gitPath()
if err := os.MkdirAll(repoPath, os.ModePerm); err != nil {
return err
}
migrateTimeout := 2 * time.Hour
remoteAddr, err := g.setURLToken(repo.CloneURL)
if err != nil {
return err
}
err = git.Clone(g.ctx, remoteAddr, repoPath, git.CloneRepoOptions{
Mirror: true,
Quiet: true,
Timeout: migrateTimeout,
SkipTLSVerify: setting.Migrations.SkipTLSVerify,
})
if err != nil {
return fmt.Errorf("Clone: %w", err)
}
if err := git.WriteCommitGraph(g.ctx, repoPath); err != nil {
return err
}
if opts.Wiki {
wikiPath := g.wikiPath()
wikiRemotePath := repository.WikiRemoteURL(g.ctx, remoteAddr)
if len(wikiRemotePath) > 0 {
if err := os.MkdirAll(wikiPath, os.ModePerm); err != nil {
return fmt.Errorf("Failed to remove %s: %w", wikiPath, err)
}
if err := git.Clone(g.ctx, wikiRemotePath, wikiPath, git.CloneRepoOptions{
Mirror: true,
Quiet: true,
Timeout: migrateTimeout,
Branch: "master",
SkipTLSVerify: setting.Migrations.SkipTLSVerify,
}); err != nil {
log.Warn("Clone wiki: %v", err)
if err := os.RemoveAll(wikiPath); err != nil {
return fmt.Errorf("Failed to remove %s: %w", wikiPath, err)
}
} else if err := git.WriteCommitGraph(g.ctx, wikiPath); err != nil {
return err
}
}
}
g.gitRepo, err = git.OpenRepository(g.ctx, g.gitPath())
return err
}
// Close closes this uploader
func (g *RepositoryDumper) Close() {
if g.gitRepo != nil {
g.gitRepo.Close()
}
if g.milestoneFile != nil {
g.milestoneFile.Close()
}
if g.labelFile != nil {
g.labelFile.Close()
}
if g.releaseFile != nil {
g.releaseFile.Close()
}
if g.issueFile != nil {
g.issueFile.Close()
}
for _, f := range g.commentFiles {
f.Close()
}
if g.pullrequestFile != nil {
g.pullrequestFile.Close()
}
for _, f := range g.reviewFiles {
f.Close()
}
}
// CreateTopics creates topics
func (g *RepositoryDumper) CreateTopics(topics ...string) error {
f, err := os.Create(filepath.Join(g.baseDir, "topic.yml"))
if err != nil {
return err
}
defer f.Close()
bs, err := yaml.Marshal(map[string]interface{}{
"topics": topics,
})
if err != nil {
return err
}
if _, err := f.Write(bs); err != nil {
return err
}
return nil
}
// CreateMilestones creates milestones
func (g *RepositoryDumper) CreateMilestones(milestones ...*base.Milestone) error {
var err error
if g.milestoneFile == nil {
g.milestoneFile, err = os.Create(filepath.Join(g.baseDir, "milestone.yml"))
if err != nil {
return err
}
}
bs, err := yaml.Marshal(milestones)
if err != nil {
return err
}
if _, err := g.milestoneFile.Write(bs); err != nil {
return err
}
return nil
}
// CreateLabels creates labels
func (g *RepositoryDumper) CreateLabels(labels ...*base.Label) error {
var err error
if g.labelFile == nil {
g.labelFile, err = os.Create(filepath.Join(g.baseDir, "label.yml"))
if err != nil {
return err
}
}
bs, err := yaml.Marshal(labels)
if err != nil {
return err
}
if _, err := g.labelFile.Write(bs); err != nil {
return err
}
return nil
}
// CreateReleases creates releases
func (g *RepositoryDumper) CreateReleases(releases ...*base.Release) error {
if g.opts.ReleaseAssets {
for _, release := range releases {
attachDir := filepath.Join("release_assets", release.TagName)
if err := os.MkdirAll(filepath.Join(g.baseDir, attachDir), os.ModePerm); err != nil {
return err
}
for _, asset := range release.Assets {
attachLocalPath := filepath.Join(attachDir, asset.Name)
// SECURITY: We cannot check the DownloadURL and DownloadFunc are safe here
// ... we must assume that they are safe and simply download the attachment
// download attachment
err := func(attachPath string) error {
var rc io.ReadCloser
var err error
if asset.DownloadURL == nil {
rc, err = asset.DownloadFunc()
if err != nil {
return err
}
} else {
resp, err := http.Get(*asset.DownloadURL)
if err != nil {
return err
}
rc = resp.Body
}
defer rc.Close()
fw, err := os.Create(attachPath)
if err != nil {
return fmt.Errorf("create: %w", err)
}
defer fw.Close()
_, err = io.Copy(fw, rc)
return err
}(filepath.Join(g.baseDir, attachLocalPath))
if err != nil {
return err
}
asset.DownloadURL = &attachLocalPath // to save the filepath on the yml file, change the source
}
}
}
var err error
if g.releaseFile == nil {
g.releaseFile, err = os.Create(filepath.Join(g.baseDir, "release.yml"))
if err != nil {
return err
}
}
bs, err := yaml.Marshal(releases)
if err != nil {
return err
}
if _, err := g.releaseFile.Write(bs); err != nil {
return err
}
return nil
}
// SyncTags syncs releases with tags in the database
func (g *RepositoryDumper) SyncTags() error {
return nil
}
// CreateIssues creates issues
func (g *RepositoryDumper) CreateIssues(issues ...*base.Issue) error {
var err error
if g.issueFile == nil {
g.issueFile, err = os.Create(filepath.Join(g.baseDir, "issue.yml"))
if err != nil {
return err
}
}
bs, err := yaml.Marshal(issues)
if err != nil {
return err
}
if _, err := g.issueFile.Write(bs); err != nil {
return err
}
return nil
}
func (g *RepositoryDumper) createItems(dir string, itemFiles map[int64]*os.File, itemsMap map[int64][]interface{}) error {
if err := os.MkdirAll(dir, os.ModePerm); err != nil {
return err
}
for number, items := range itemsMap {
if err := g.encodeItems(number, items, dir, itemFiles); err != nil {
return err
}
}
return nil
}
func (g *RepositoryDumper) encodeItems(number int64, items []interface{}, dir string, itemFiles map[int64]*os.File) error {
itemFile := itemFiles[number]
if itemFile == nil {
var err error
itemFile, err = os.Create(filepath.Join(dir, fmt.Sprintf("%d.yml", number)))
if err != nil {
return err
}
itemFiles[number] = itemFile
}
encoder := yaml.NewEncoder(itemFile)
defer encoder.Close()
return encoder.Encode(items)
}
// CreateComments creates comments of issues
func (g *RepositoryDumper) CreateComments(comments ...*base.Comment) error {
commentsMap := make(map[int64][]interface{}, len(comments))
for _, comment := range comments {
commentsMap[comment.IssueIndex] = append(commentsMap[comment.IssueIndex], comment)
}
return g.createItems(g.commentDir(), g.commentFiles, commentsMap)
}
func (g *RepositoryDumper) handlePullRequest(pr *base.PullRequest) error {
// SECURITY: this pr must have been ensured safe
if !pr.EnsuredSafe {
log.Error("PR #%d in %s/%s has not been checked for safety ... We will ignore this.", pr.Number, g.repoOwner, g.repoName)
return fmt.Errorf("unsafe PR #%d", pr.Number)
}
// First we download the patch file
err := func() error {
// if the patchURL is empty there is nothing to download
if pr.PatchURL == "" {
return nil
}
// SECURITY: We will assume that the pr.PatchURL has been checked
// pr.PatchURL maybe a local file - but note EnsureSafe should be asserting that this safe
u, err := g.setURLToken(pr.PatchURL)
if err != nil {
return err
}
// SECURITY: We will assume that the pr.PatchURL has been checked
// pr.PatchURL maybe a local file - but note EnsureSafe should be asserting that this safe
resp, err := http.Get(u) // TODO: This probably needs to use the downloader as there may be rate limiting issues here
if err != nil {
return err
}
defer resp.Body.Close()
pullDir := filepath.Join(g.gitPath(), "pulls")
if err = os.MkdirAll(pullDir, os.ModePerm); err != nil {
return err
}
fPath := filepath.Join(pullDir, fmt.Sprintf("%d.patch", pr.Number))
f, err := os.Create(fPath)
if err != nil {
return err
}
defer f.Close()
// TODO: Should there be limits on the size of this file?
if _, err = io.Copy(f, resp.Body); err != nil {
return err
}
pr.PatchURL = "git/pulls/" + fmt.Sprintf("%d.patch", pr.Number)
return nil
}()
if err != nil {
log.Error("PR #%d in %s/%s unable to download patch: %v", pr.Number, g.repoOwner, g.repoName, err)
return err
}
isFork := pr.IsForkPullRequest()
// Even if it's a forked repo PR, we have to change head info as the same as the base info
oldHeadOwnerName := pr.Head.OwnerName
pr.Head.OwnerName, pr.Head.RepoName = pr.Base.OwnerName, pr.Base.RepoName
if !isFork || pr.State == "closed" {
return nil
}
// OK we want to fetch the current head as a branch from its CloneURL
// 1. Is there a head clone URL available?
// 2. Is there a head ref available?
if pr.Head.CloneURL == "" || pr.Head.Ref == "" {
// Set head information if pr.Head.SHA is available
if pr.Head.SHA != "" {
_, _, err = git.NewCommand(g.ctx, "update-ref", "--no-deref").AddDynamicArguments(pr.GetGitRefName(), pr.Head.SHA).RunStdString(&git.RunOpts{Dir: g.gitPath()})
if err != nil {
log.Error("PR #%d in %s/%s unable to update-ref for pr HEAD: %v", pr.Number, g.repoOwner, g.repoName, err)
}
}
return nil
}
// 3. We need to create a remote for this clone url
// ... maybe we already have a name for this remote
remote, ok := g.prHeadCache[pr.Head.CloneURL+":"]
if !ok {
// ... let's try ownername as a reasonable name
remote = oldHeadOwnerName
if !git.IsValidRefPattern(remote) {
// ... let's try something less nice
remote = "head-pr-" + strconv.FormatInt(pr.Number, 10)
}
// ... now add the remote
err := g.gitRepo.AddRemote(remote, pr.Head.CloneURL, true)
if err != nil {
log.Error("PR #%d in %s/%s AddRemote[%s] failed: %v", pr.Number, g.repoOwner, g.repoName, remote, err)
} else {
g.prHeadCache[pr.Head.CloneURL+":"] = remote
ok = true
}
}
if !ok {
// Set head information if pr.Head.SHA is available
if pr.Head.SHA != "" {
_, _, err = git.NewCommand(g.ctx, "update-ref", "--no-deref").AddDynamicArguments(pr.GetGitRefName(), pr.Head.SHA).RunStdString(&git.RunOpts{Dir: g.gitPath()})
if err != nil {
log.Error("PR #%d in %s/%s unable to update-ref for pr HEAD: %v", pr.Number, g.repoOwner, g.repoName, err)
}
}
return nil
}
// 4. Check if we already have this ref?
localRef, ok := g.prHeadCache[pr.Head.CloneURL+":"+pr.Head.Ref]
if !ok {
// ... We would normally name this migrated branch as <OwnerName>/<HeadRef> but we need to ensure that is safe
localRef = git.SanitizeRefPattern(oldHeadOwnerName + "/" + pr.Head.Ref)
// ... Now we must assert that this does not exist
if g.gitRepo.IsBranchExist(localRef) {
localRef = "head-pr-" + strconv.FormatInt(pr.Number, 10) + "/" + localRef
i := 0
for g.gitRepo.IsBranchExist(localRef) {
if i > 5 {
// ... We tried, we really tried but this is just a seriously unfriendly repo
return fmt.Errorf("unable to create unique local reference from %s", pr.Head.Ref)
}
// OK just try some uuids!
localRef = git.SanitizeRefPattern("head-pr-" + strconv.FormatInt(pr.Number, 10) + uuid.New().String())
i++
}
}
fetchArg := pr.Head.Ref + ":" + git.BranchPrefix + localRef
if strings.HasPrefix(fetchArg, "-") {
fetchArg = git.BranchPrefix + fetchArg
}
_, _, err = git.NewCommand(g.ctx, "fetch", "--no-tags").AddDashesAndList(remote, fetchArg).RunStdString(&git.RunOpts{Dir: g.gitPath()})
if err != nil {
log.Error("Fetch branch from %s failed: %v", pr.Head.CloneURL, err)
// We need to continue here so that the Head.Ref is reset and we attempt to set the gitref for the PR
// (This last step will likely fail but we should try to do as much as we can.)
} else {
// Cache the localRef as the Head.Ref - if we've failed we can always try again.
g.prHeadCache[pr.Head.CloneURL+":"+pr.Head.Ref] = localRef
}
}
// Set the pr.Head.Ref to the localRef
pr.Head.Ref = localRef
// 5. Now if pr.Head.SHA == "" we should recover this to the head of this branch
if pr.Head.SHA == "" {
headSha, err := g.gitRepo.GetBranchCommitID(localRef)
if err != nil {
log.Error("unable to get head SHA of local head for PR #%d from %s in %s/%s. Error: %v", pr.Number, pr.Head.Ref, g.repoOwner, g.repoName, err)
return nil
}
pr.Head.SHA = headSha
}
if pr.Head.SHA != "" {
_, _, err = git.NewCommand(g.ctx, "update-ref", "--no-deref").AddDynamicArguments(pr.GetGitRefName(), pr.Head.SHA).RunStdString(&git.RunOpts{Dir: g.gitPath()})
if err != nil {
log.Error("unable to set %s as the local head for PR #%d from %s in %s/%s. Error: %v", pr.Head.SHA, pr.Number, pr.Head.Ref, g.repoOwner, g.repoName, err)
}
}
return nil
}
// CreatePullRequests creates pull requests
func (g *RepositoryDumper) CreatePullRequests(prs ...*base.PullRequest) error {
var err error
if g.pullrequestFile == nil {
if err := os.MkdirAll(g.baseDir, os.ModePerm); err != nil {
return err
}
g.pullrequestFile, err = os.Create(filepath.Join(g.baseDir, "pull_request.yml"))
if err != nil {
return err
}
}
encoder := yaml.NewEncoder(g.pullrequestFile)
defer encoder.Close()
count := 0
for i := 0; i < len(prs); i++ {
pr := prs[i]
if err := g.handlePullRequest(pr); err != nil {
log.Error("PR #%d in %s/%s failed - skipping", pr.Number, g.repoOwner, g.repoName, err)
continue
}
prs[count] = pr
count++
}
prs = prs[:count]
return encoder.Encode(prs)
}
// CreateReviews create pull request reviews
func (g *RepositoryDumper) CreateReviews(reviews ...*base.Review) error {
reviewsMap := make(map[int64][]interface{}, len(reviews))
for _, review := range reviews {
reviewsMap[review.IssueIndex] = append(reviewsMap[review.IssueIndex], review)
}
return g.createItems(g.reviewDir(), g.reviewFiles, reviewsMap)
}
// Rollback when migrating failed, this will rollback all the changes.
func (g *RepositoryDumper) Rollback() error {
g.Close()
return os.RemoveAll(g.baseDir)
}
// Finish when migrating succeed, this will update something.
func (g *RepositoryDumper) Finish() error {
return nil
}
// DumpRepository dump repository according MigrateOptions to a local directory
func DumpRepository(ctx context.Context, baseDir, ownerName string, opts base.MigrateOptions) error {
doer, err := user_model.GetAdminUser(ctx)
if err != nil {
return err
}
downloader, err := newDownloader(ctx, ownerName, opts)
if err != nil {
return err
}
uploader, err := NewRepositoryDumper(ctx, baseDir, ownerName, opts.RepoName, opts)
if err != nil {
return err
}
if err := migrateRepository(doer, downloader, uploader, opts, nil); err != nil {
if err1 := uploader.Rollback(); err1 != nil {
log.Error("rollback failed: %v", err1)
}
return err
}
return nil
}
func updateOptionsUnits(opts *base.MigrateOptions, units []string) error {
if len(units) == 0 {
opts.Wiki = true
opts.Issues = true
opts.Milestones = true
opts.Labels = true
opts.Releases = true
opts.Comments = true
opts.PullRequests = true
opts.ReleaseAssets = true
} else {
for _, unit := range units {
switch strings.ToLower(strings.TrimSpace(unit)) {
case "":
continue
case "wiki":
opts.Wiki = true
case "issues":
opts.Issues = true
case "milestones":
opts.Milestones = true
case "labels":
opts.Labels = true
case "releases":
opts.Releases = true
case "release_assets":
opts.ReleaseAssets = true
case "comments":
opts.Comments = true
case "pull_requests":
opts.PullRequests = true
default:
return errors.New("invalid unit: " + unit)
}
}
}
return nil
}
// RestoreRepository restore a repository from the disk directory
func RestoreRepository(ctx context.Context, baseDir, ownerName, repoName string, units []string, validation bool) error {
doer, err := user_model.GetAdminUser(ctx)
if err != nil {
return err
}
uploader := NewGiteaLocalUploader(ctx, doer, ownerName, repoName)
downloader, err := NewRepositoryRestorer(ctx, baseDir, ownerName, repoName, validation)
if err != nil {
return err
}
opts, err := downloader.getRepoOptions()
if err != nil {
return err
}
tp, _ := strconv.Atoi(opts["service_type"])
migrateOpts := base.MigrateOptions{
GitServiceType: structs.GitServiceType(tp),
}
if err := updateOptionsUnits(&migrateOpts, units); err != nil {
return err
}
if err = migrateRepository(doer, downloader, uploader, migrateOpts, nil); err != nil {
if err1 := uploader.Rollback(); err1 != nil {
log.Error("rollback failed: %v", err1)
}
return err
}
return updateMigrationPosterIDByGitService(ctx, structs.GitServiceType(tp))
}